From 8665bd53f2f2e27e5511d90428cb3f60e6d0ce15 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 18 May 2024 20:50:12 +0200 Subject: Merging upstream version 6.8.9. Signed-off-by: Daniel Baumann --- Documentation/gpu/amdgpu/apu-asic-info-table.csv | 5 +- Documentation/gpu/amdgpu/display/dc-debug.rst | 41 ++ .../gpu/amdgpu/display/trace-groups-table.csv | 29 + Documentation/gpu/automated_testing.rst | 7 +- Documentation/gpu/driver-uapi.rst | 5 + Documentation/gpu/drivers.rst | 3 + Documentation/gpu/drm-kms-helpers.rst | 6 + Documentation/gpu/drm-kms.rst | 6 + Documentation/gpu/drm-mm.rst | 10 + Documentation/gpu/drm-vm-bind-locking.rst | 582 +++++++++++++++++++++ Documentation/gpu/imagination/index.rst | 13 + Documentation/gpu/imagination/uapi.rst | 171 ++++++ Documentation/gpu/implementation_guidelines.rst | 1 + Documentation/gpu/rfc/xe.rst | 132 ++--- Documentation/gpu/todo.rst | 40 ++ Documentation/gpu/xe/index.rst | 25 + Documentation/gpu/xe/xe_cs.rst | 8 + Documentation/gpu/xe/xe_debugging.rst | 7 + Documentation/gpu/xe/xe_firmware.rst | 37 ++ Documentation/gpu/xe/xe_gt_mcr.rst | 13 + Documentation/gpu/xe/xe_map.rst | 8 + Documentation/gpu/xe/xe_migrate.rst | 8 + Documentation/gpu/xe/xe_mm.rst | 14 + Documentation/gpu/xe/xe_pcode.rst | 14 + Documentation/gpu/xe/xe_pm.rst | 14 + Documentation/gpu/xe/xe_rtp.rst | 20 + Documentation/gpu/xe/xe_tile.rst | 14 + Documentation/gpu/xe/xe_wa.rst | 14 + 28 files changed, 1177 insertions(+), 70 deletions(-) create mode 100644 Documentation/gpu/amdgpu/display/trace-groups-table.csv create mode 100644 Documentation/gpu/drm-vm-bind-locking.rst create mode 100644 Documentation/gpu/imagination/index.rst create mode 100644 Documentation/gpu/imagination/uapi.rst create mode 100644 Documentation/gpu/xe/index.rst create mode 100644 Documentation/gpu/xe/xe_cs.rst create mode 100644 Documentation/gpu/xe/xe_debugging.rst create mode 100644 Documentation/gpu/xe/xe_firmware.rst create mode 100644 Documentation/gpu/xe/xe_gt_mcr.rst create mode 100644 Documentation/gpu/xe/xe_map.rst create mode 100644 Documentation/gpu/xe/xe_migrate.rst create mode 100644 Documentation/gpu/xe/xe_mm.rst create mode 100644 Documentation/gpu/xe/xe_pcode.rst create mode 100644 Documentation/gpu/xe/xe_pm.rst create mode 100644 Documentation/gpu/xe/xe_rtp.rst create mode 100644 Documentation/gpu/xe/xe_tile.rst create mode 100644 Documentation/gpu/xe/xe_wa.rst (limited to 'Documentation/gpu') diff --git a/Documentation/gpu/amdgpu/apu-asic-info-table.csv b/Documentation/gpu/amdgpu/apu-asic-info-table.csv index 2e76b427b..18868abe2 100644 --- a/Documentation/gpu/amdgpu/apu-asic-info-table.csv +++ b/Documentation/gpu/amdgpu/apu-asic-info-table.csv @@ -7,6 +7,7 @@ SteamDeck, VANGOGH, DCN 3.0.1, 10.3.1, VCN 3.1.0, 5.2.1, 11.5.0 Ryzen 5000 series / Ryzen 7x30 series, GREEN SARDINE / Cezanne / Barcelo / Barcelo-R, DCN 2.1, 9.3, VCN 2.2, 4.1.1, 12.0.1 Ryzen 6000 series / Ryzen 7x35 series / Ryzen 7x36 series, YELLOW CARP / Rembrandt / Rembrandt-R, 3.1.2, 10.3.3, VCN 3.1.1, 5.2.3, 13.0.3 Ryzen 7000 series (AM5), Raphael, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5 -Ryzen 7x45 series (FL1), / Dragon Range, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5 +Ryzen 7x45 series (FL1), Dragon Range, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5 Ryzen 7x20 series, Mendocino, 3.1.6, 10.3.7, 3.1.1, 5.2.7, 13.0.8 -Ryzen 7x40 series, Phoenix, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11 \ No newline at end of file +Ryzen 7x40 series, Phoenix, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11 +Ryzen 8x40 series, Hawk Point, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11 diff --git a/Documentation/gpu/amdgpu/display/dc-debug.rst b/Documentation/gpu/amdgpu/display/dc-debug.rst index 40c55a618..817631b1d 100644 --- a/Documentation/gpu/amdgpu/display/dc-debug.rst +++ b/Documentation/gpu/amdgpu/display/dc-debug.rst @@ -75,3 +75,44 @@ change in real-time by using something like:: When reporting a bug related to DC, consider attaching this log before and after you reproduce the bug. + +DMUB Firmware Debug +=================== + +Sometimes, dmesg logs aren't enough. This is especially true if a feature is +implemented primarily in DMUB firmware. In such cases, all we see in dmesg when +an issue arises is some generic timeout error. So, to get more relevant +information, we can trace DMUB commands by enabling the relevant bits in +`amdgpu_dm_dmub_trace_mask`. + +Currently, we support the tracing of the following groups: + +Trace Groups +------------ + +.. csv-table:: + :header-rows: 1 + :widths: 1, 1 + :file: ./trace-groups-table.csv + +**Note: Not all ASICs support all of the listed trace groups** + +So, to enable just PSR tracing you can use the following command:: + + # echo 0x8020 > /sys/kernel/debug/dri/0/amdgpu_dm_dmub_trace_mask + +Then, you need to enable logging trace events to the buffer, which you can do +using the following:: + + # echo 1 > /sys/kernel/debug/dri/0/amdgpu_dm_dmcub_trace_event_en + +Lastly, after you are able to reproduce the issue you are trying to debug, +you can disable tracing and read the trace log by using the following:: + + # echo 0 > /sys/kernel/debug/dri/0/amdgpu_dm_dmcub_trace_event_en + # cat /sys/kernel/debug/dri/0/amdgpu_dm_dmub_tracebuffer + +So, when reporting bugs related to features such as PSR and ABM, consider +enabling the relevant bits in the mask before reproducing the issue and +attach the log that you obtain from the trace buffer in any bug reports that you +create. diff --git a/Documentation/gpu/amdgpu/display/trace-groups-table.csv b/Documentation/gpu/amdgpu/display/trace-groups-table.csv new file mode 100644 index 000000000..3f6a50d1d --- /dev/null +++ b/Documentation/gpu/amdgpu/display/trace-groups-table.csv @@ -0,0 +1,29 @@ +Name, Mask Value +INFO, 0x1 +IRQ SVC, 0x2 +VBIOS, 0x4 +REGISTER, 0x8 +PHY DBG, 0x10 +PSR, 0x20 +AUX, 0x40 +SMU, 0x80 +MALL, 0x100 +ABM, 0x200 +ALPM, 0x400 +TIMER, 0x800 +HW LOCK MGR, 0x1000 +INBOX1, 0x2000 +PHY SEQ, 0x4000 +PSR STATE, 0x8000 +ZSTATE, 0x10000 +TRANSMITTER CTL, 0x20000 +PANEL CNTL, 0x40000 +FAMS, 0x80000 +DPIA, 0x100000 +SUBVP, 0x200000 +INBOX0, 0x400000 +SDP, 0x4000000 +REPLAY, 0x8000000 +REPLAY RESIDENCY, 0x20000000 +CURSOR INFO, 0x80000000 +IPS, 0x100000000 diff --git a/Documentation/gpu/automated_testing.rst b/Documentation/gpu/automated_testing.rst index 240e29d5b..2d5a28866 100644 --- a/Documentation/gpu/automated_testing.rst +++ b/Documentation/gpu/automated_testing.rst @@ -69,14 +69,15 @@ the result. They will still be run. Each new flake entry must be associated with a link to the email reporting the bug to the author of the affected driver, the board name or Device Tree name of -the board, the first kernel version affected, and an approximation of the -failure rate. +the board, the first kernel version affected, the IGT version used for tests, +and an approximation of the failure rate. They should be provided under the following format:: # Bug Report: $LORE_OR_PATCHWORK_URL # Board Name: broken-board.dtb - # Version: 6.6-rc1 + # Linux Version: 6.6-rc1 + # IGT Version: 1.28-gd2af13d9f # Failure Rate: 100 flaky-test diff --git a/Documentation/gpu/driver-uapi.rst b/Documentation/gpu/driver-uapi.rst index c08bcbb95..e5070a0e9 100644 --- a/Documentation/gpu/driver-uapi.rst +++ b/Documentation/gpu/driver-uapi.rst @@ -17,3 +17,8 @@ VM_BIND / EXEC uAPI :doc: Overview .. kernel-doc:: include/uapi/drm/nouveau_drm.h + +drm/xe uAPI +=========== + +.. kernel-doc:: include/uapi/drm/xe_drm.h diff --git a/Documentation/gpu/drivers.rst b/Documentation/gpu/drivers.rst index 45a12e552..b899cbc5c 100644 --- a/Documentation/gpu/drivers.rst +++ b/Documentation/gpu/drivers.rst @@ -3,9 +3,11 @@ GPU Driver Documentation ======================== .. toctree:: + :maxdepth: 3 amdgpu/index i915 + imagination/index mcde meson pl111 @@ -16,6 +18,7 @@ GPU Driver Documentation vkms bridge/dw-hdmi xen-front + xe/index afbc komeda-kms panfrost diff --git a/Documentation/gpu/drm-kms-helpers.rst b/Documentation/gpu/drm-kms-helpers.rst index b748b8ae7..59cfe8a7a 100644 --- a/Documentation/gpu/drm-kms-helpers.rst +++ b/Documentation/gpu/drm-kms-helpers.rst @@ -363,6 +363,12 @@ EDID Helper Functions Reference .. kernel-doc:: drivers/gpu/drm/drm_edid.c :export: +.. kernel-doc:: include/drm/drm_eld.h + :internal: + +.. kernel-doc:: drivers/gpu/drm/drm_eld.c + :export: + SCDC Helper Functions Reference =============================== diff --git a/Documentation/gpu/drm-kms.rst b/Documentation/gpu/drm-kms.rst index a98a7e04e..13d3627d8 100644 --- a/Documentation/gpu/drm-kms.rst +++ b/Documentation/gpu/drm-kms.rst @@ -581,6 +581,12 @@ Variable Refresh Properties .. kernel-doc:: drivers/gpu/drm/drm_connector.c :doc: Variable refresh properties +Cursor Hotspot Properties +--------------------------- + +.. kernel-doc:: drivers/gpu/drm/drm_plane.c + :doc: hotspot properties + Existing KMS Properties ----------------------- diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst index 602010cb6..d55751cad 100644 --- a/Documentation/gpu/drm-mm.rst +++ b/Documentation/gpu/drm-mm.rst @@ -466,6 +466,8 @@ DRM MM Range Allocator Function References .. kernel-doc:: drivers/gpu/drm/drm_mm.c :export: +.. _drm_gpuvm: + DRM GPUVM ========= @@ -481,6 +483,8 @@ Split and Merge .. kernel-doc:: drivers/gpu/drm/drm_gpuvm.c :doc: Split and Merge +.. _drm_gpuvm_locking: + Locking ------- @@ -552,6 +556,12 @@ Overview .. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c :doc: Overview +Flow Control +------------ + +.. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c + :doc: Flow Control + Scheduler Function References ----------------------------- diff --git a/Documentation/gpu/drm-vm-bind-locking.rst b/Documentation/gpu/drm-vm-bind-locking.rst new file mode 100644 index 000000000..a345aa513 --- /dev/null +++ b/Documentation/gpu/drm-vm-bind-locking.rst @@ -0,0 +1,582 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +=============== +VM_BIND locking +=============== + +This document attempts to describe what's needed to get VM_BIND locking right, +including the userptr mmu_notifier locking. It also discusses some +optimizations to get rid of the looping through of all userptr mappings and +external / shared object mappings that is needed in the simplest +implementation. In addition, there is a section describing the VM_BIND locking +required for implementing recoverable pagefaults. + +The DRM GPUVM set of helpers +============================ + +There is a set of helpers for drivers implementing VM_BIND, and this +set of helpers implements much, but not all of the locking described +in this document. In particular, it is currently lacking a userptr +implementation. This document does not intend to describe the DRM GPUVM +implementation in detail, but it is covered in :ref:`its own +documentation `. It is highly recommended for any driver +implementing VM_BIND to use the DRM GPUVM helpers and to extend it if +common functionality is missing. + +Nomenclature +============ + +* ``gpu_vm``: Abstraction of a virtual GPU address space with + meta-data. Typically one per client (DRM file-private), or one per + execution context. +* ``gpu_vma``: Abstraction of a GPU address range within a gpu_vm with + associated meta-data. The backing storage of a gpu_vma can either be + a GEM object or anonymous or page-cache pages mapped also into the CPU + address space for the process. +* ``gpu_vm_bo``: Abstracts the association of a GEM object and + a VM. The GEM object maintains a list of gpu_vm_bos, where each gpu_vm_bo + maintains a list of gpu_vmas. +* ``userptr gpu_vma or just userptr``: A gpu_vma, whose backing store + is anonymous or page-cache pages as described above. +* ``revalidating``: Revalidating a gpu_vma means making the latest version + of the backing store resident and making sure the gpu_vma's + page-table entries point to that backing store. +* ``dma_fence``: A struct dma_fence that is similar to a struct completion + and which tracks GPU activity. When the GPU activity is finished, + the dma_fence signals. Please refer to the ``DMA Fences`` section of + the :doc:`dma-buf doc `. +* ``dma_resv``: A struct dma_resv (a.k.a reservation object) that is used + to track GPU activity in the form of multiple dma_fences on a + gpu_vm or a GEM object. The dma_resv contains an array / list + of dma_fences and a lock that needs to be held when adding + additional dma_fences to the dma_resv. The lock is of a type that + allows deadlock-safe locking of multiple dma_resvs in arbitrary + order. Please refer to the ``Reservation Objects`` section of the + :doc:`dma-buf doc `. +* ``exec function``: An exec function is a function that revalidates all + affected gpu_vmas, submits a GPU command batch and registers the + dma_fence representing the GPU command's activity with all affected + dma_resvs. For completeness, although not covered by this document, + it's worth mentioning that an exec function may also be the + revalidation worker that is used by some drivers in compute / + long-running mode. +* ``local object``: A GEM object which is only mapped within a + single VM. Local GEM objects share the gpu_vm's dma_resv. +* ``external object``: a.k.a shared object: A GEM object which may be shared + by multiple gpu_vms and whose backing storage may be shared with + other drivers. + +Locks and locking order +======================= + +One of the benefits of VM_BIND is that local GEM objects share the gpu_vm's +dma_resv object and hence the dma_resv lock. So, even with a huge +number of local GEM objects, only one lock is needed to make the exec +sequence atomic. + +The following locks and locking orders are used: + +* The ``gpu_vm->lock`` (optionally an rwsem). Protects the gpu_vm's + data structure keeping track of gpu_vmas. It can also protect the + gpu_vm's list of userptr gpu_vmas. With a CPU mm analogy this would + correspond to the mmap_lock. An rwsem allows several readers to walk + the VM tree concurrently, but the benefit of that concurrency most + likely varies from driver to driver. +* The ``userptr_seqlock``. This lock is taken in read mode for each + userptr gpu_vma on the gpu_vm's userptr list, and in write mode during mmu + notifier invalidation. This is not a real seqlock but described in + ``mm/mmu_notifier.c`` as a "Collision-retry read-side/write-side + 'lock' a lot like a seqcount. However this allows multiple + write-sides to hold it at once...". The read side critical section + is enclosed by ``mmu_interval_read_begin() / + mmu_interval_read_retry()`` with ``mmu_interval_read_begin()`` + sleeping if the write side is held. + The write side is held by the core mm while calling mmu interval + invalidation notifiers. +* The ``gpu_vm->resv`` lock. Protects the gpu_vm's list of gpu_vmas needing + rebinding, as well as the residency state of all the gpu_vm's local + GEM objects. + Furthermore, it typically protects the gpu_vm's list of evicted and + external GEM objects. +* The ``gpu_vm->userptr_notifier_lock``. This is an rwsem that is + taken in read mode during exec and write mode during a mmu notifier + invalidation. The userptr notifier lock is per gpu_vm. +* The ``gem_object->gpuva_lock`` This lock protects the GEM object's + list of gpu_vm_bos. This is usually the same lock as the GEM + object's dma_resv, but some drivers protects this list differently, + see below. +* The ``gpu_vm list spinlocks``. With some implementations they are needed + to be able to update the gpu_vm evicted- and external object + list. For those implementations, the spinlocks are grabbed when the + lists are manipulated. However, to avoid locking order violations + with the dma_resv locks, a special scheme is needed when iterating + over the lists. + +.. _gpu_vma lifetime: + +Protection and lifetime of gpu_vm_bos and gpu_vmas +================================================== + +The GEM object's list of gpu_vm_bos, and the gpu_vm_bo's list of gpu_vmas +is protected by the ``gem_object->gpuva_lock``, which is typically the +same as the GEM object's dma_resv, but if the driver +needs to access these lists from within a dma_fence signalling +critical section, it can instead choose to protect it with a +separate lock, which can be locked from within the dma_fence signalling +critical section. Such drivers then need to pay additional attention +to what locks need to be taken from within the loop when iterating +over the gpu_vm_bo and gpu_vma lists to avoid locking-order violations. + +The DRM GPUVM set of helpers provide lockdep asserts that this lock is +held in relevant situations and also provides a means of making itself +aware of which lock is actually used: :c:func:`drm_gem_gpuva_set_lock`. + +Each gpu_vm_bo holds a reference counted pointer to the underlying GEM +object, and each gpu_vma holds a reference counted pointer to the +gpu_vm_bo. When iterating over the GEM object's list of gpu_vm_bos and +over the gpu_vm_bo's list of gpu_vmas, the ``gem_object->gpuva_lock`` must +not be dropped, otherwise, gpu_vmas attached to a gpu_vm_bo may +disappear without notice since those are not reference-counted. A +driver may implement its own scheme to allow this at the expense of +additional complexity, but this is outside the scope of this document. + +In the DRM GPUVM implementation, each gpu_vm_bo and each gpu_vma +holds a reference count on the gpu_vm itself. Due to this, and to avoid circular +reference counting, cleanup of the gpu_vm's gpu_vmas must not be done from the +gpu_vm's destructor. Drivers typically implements a gpu_vm close +function for this cleanup. The gpu_vm close function will abort gpu +execution using this VM, unmap all gpu_vmas and release page-table memory. + +Revalidation and eviction of local objects +========================================== + +Note that in all the code examples given below we use simplified +pseudo-code. In particular, the dma_resv deadlock avoidance algorithm +as well as reserving memory for dma_resv fences is left out. + +Revalidation +____________ +With VM_BIND, all local objects need to be resident when the gpu is +executing using the gpu_vm, and the objects need to have valid +gpu_vmas set up pointing to them. Typically, each gpu command buffer +submission is therefore preceded with a re-validation section: + +.. code-block:: C + + dma_resv_lock(gpu_vm->resv); + + // Validation section starts here. + for_each_gpu_vm_bo_on_evict_list(&gpu_vm->evict_list, &gpu_vm_bo) { + validate_gem_bo(&gpu_vm_bo->gem_bo); + + // The following list iteration needs the Gem object's + // dma_resv to be held (it protects the gpu_vm_bo's list of + // gpu_vmas, but since local gem objects share the gpu_vm's + // dma_resv, it is already held at this point. + for_each_gpu_vma_of_gpu_vm_bo(&gpu_vm_bo, &gpu_vma) + move_gpu_vma_to_rebind_list(&gpu_vma, &gpu_vm->rebind_list); + } + + for_each_gpu_vma_on_rebind_list(&gpu vm->rebind_list, &gpu_vma) { + rebind_gpu_vma(&gpu_vma); + remove_gpu_vma_from_rebind_list(&gpu_vma); + } + // Validation section ends here, and job submission starts. + + add_dependencies(&gpu_job, &gpu_vm->resv); + job_dma_fence = gpu_submit(&gpu_job)); + + add_dma_fence(job_dma_fence, &gpu_vm->resv); + dma_resv_unlock(gpu_vm->resv); + +The reason for having a separate gpu_vm rebind list is that there +might be userptr gpu_vmas that are not mapping a buffer object that +also need rebinding. + +Eviction +________ + +Eviction of one of these local objects will then look similar to the +following: + +.. code-block:: C + + obj = get_object_from_lru(); + + dma_resv_lock(obj->resv); + for_each_gpu_vm_bo_of_obj(obj, &gpu_vm_bo); + add_gpu_vm_bo_to_evict_list(&gpu_vm_bo, &gpu_vm->evict_list); + + add_dependencies(&eviction_job, &obj->resv); + job_dma_fence = gpu_submit(&eviction_job); + add_dma_fence(&obj->resv, job_dma_fence); + + dma_resv_unlock(&obj->resv); + put_object(obj); + +Note that since the object is local to the gpu_vm, it will share the gpu_vm's +dma_resv lock such that ``obj->resv == gpu_vm->resv``. +The gpu_vm_bos marked for eviction are put on the gpu_vm's evict list, +which is protected by ``gpu_vm->resv``. During eviction all local +objects have their dma_resv locked and, due to the above equality, also +the gpu_vm's dma_resv protecting the gpu_vm's evict list is locked. + +With VM_BIND, gpu_vmas don't need to be unbound before eviction, +since the driver must ensure that the eviction blit or copy will wait +for GPU idle or depend on all previous GPU activity. Furthermore, any +subsequent attempt by the GPU to access freed memory through the +gpu_vma will be preceded by a new exec function, with a revalidation +section which will make sure all gpu_vmas are rebound. The eviction +code holding the object's dma_resv while revalidating will ensure a +new exec function may not race with the eviction. + +A driver can be implemented in such a way that, on each exec function, +only a subset of vmas are selected for rebind. In this case, all vmas that are +*not* selected for rebind must be unbound before the exec +function workload is submitted. + +Locking with external buffer objects +==================================== + +Since external buffer objects may be shared by multiple gpu_vm's they +can't share their reservation object with a single gpu_vm. Instead +they need to have a reservation object of their own. The external +objects bound to a gpu_vm using one or many gpu_vmas are therefore put on a +per-gpu_vm list which is protected by the gpu_vm's dma_resv lock or +one of the :ref:`gpu_vm list spinlocks `. Once +the gpu_vm's reservation object is locked, it is safe to traverse the +external object list and lock the dma_resvs of all external +objects. However, if instead a list spinlock is used, a more elaborate +iteration scheme needs to be used. + +At eviction time, the gpu_vm_bos of *all* the gpu_vms an external +object is bound to need to be put on their gpu_vm's evict list. +However, when evicting an external object, the dma_resvs of the +gpu_vms the object is bound to are typically not held. Only +the object's private dma_resv can be guaranteed to be held. If there +is a ww_acquire context at hand at eviction time we could grab those +dma_resvs but that could cause expensive ww_mutex rollbacks. A simple +option is to just mark the gpu_vm_bos of the evicted gem object with +an ``evicted`` bool that is inspected before the next time the +corresponding gpu_vm evicted list needs to be traversed. For example, when +traversing the list of external objects and locking them. At that time, +both the gpu_vm's dma_resv and the object's dma_resv is held, and the +gpu_vm_bo marked evicted, can then be added to the gpu_vm's list of +evicted gpu_vm_bos. The ``evicted`` bool is formally protected by the +object's dma_resv. + +The exec function becomes + +.. code-block:: C + + dma_resv_lock(gpu_vm->resv); + + // External object list is protected by the gpu_vm->resv lock. + for_each_gpu_vm_bo_on_extobj_list(gpu_vm, &gpu_vm_bo) { + dma_resv_lock(gpu_vm_bo.gem_obj->resv); + if (gpu_vm_bo_marked_evicted(&gpu_vm_bo)) + add_gpu_vm_bo_to_evict_list(&gpu_vm_bo, &gpu_vm->evict_list); + } + + for_each_gpu_vm_bo_on_evict_list(&gpu_vm->evict_list, &gpu_vm_bo) { + validate_gem_bo(&gpu_vm_bo->gem_bo); + + for_each_gpu_vma_of_gpu_vm_bo(&gpu_vm_bo, &gpu_vma) + move_gpu_vma_to_rebind_list(&gpu_vma, &gpu_vm->rebind_list); + } + + for_each_gpu_vma_on_rebind_list(&gpu vm->rebind_list, &gpu_vma) { + rebind_gpu_vma(&gpu_vma); + remove_gpu_vma_from_rebind_list(&gpu_vma); + } + + add_dependencies(&gpu_job, &gpu_vm->resv); + job_dma_fence = gpu_submit(&gpu_job)); + + add_dma_fence(job_dma_fence, &gpu_vm->resv); + for_each_external_obj(gpu_vm, &obj) + add_dma_fence(job_dma_fence, &obj->resv); + dma_resv_unlock_all_resv_locks(); + +And the corresponding shared-object aware eviction would look like: + +.. code-block:: C + + obj = get_object_from_lru(); + + dma_resv_lock(obj->resv); + for_each_gpu_vm_bo_of_obj(obj, &gpu_vm_bo) + if (object_is_vm_local(obj)) + add_gpu_vm_bo_to_evict_list(&gpu_vm_bo, &gpu_vm->evict_list); + else + mark_gpu_vm_bo_evicted(&gpu_vm_bo); + + add_dependencies(&eviction_job, &obj->resv); + job_dma_fence = gpu_submit(&eviction_job); + add_dma_fence(&obj->resv, job_dma_fence); + + dma_resv_unlock(&obj->resv); + put_object(obj); + +.. _Spinlock iteration: + +Accessing the gpu_vm's lists without the dma_resv lock held +=========================================================== + +Some drivers will hold the gpu_vm's dma_resv lock when accessing the +gpu_vm's evict list and external objects lists. However, there are +drivers that need to access these lists without the dma_resv lock +held, for example due to asynchronous state updates from within the +dma_fence signalling critical path. In such cases, a spinlock can be +used to protect manipulation of the lists. However, since higher level +sleeping locks need to be taken for each list item while iterating +over the lists, the items already iterated over need to be +temporarily moved to a private list and the spinlock released +while processing each item: + +.. code block:: C + + struct list_head still_in_list; + + INIT_LIST_HEAD(&still_in_list); + + spin_lock(&gpu_vm->list_lock); + do { + struct list_head *entry = list_first_entry_or_null(&gpu_vm->list, head); + + if (!entry) + break; + + list_move_tail(&entry->head, &still_in_list); + list_entry_get_unless_zero(entry); + spin_unlock(&gpu_vm->list_lock); + + process(entry); + + spin_lock(&gpu_vm->list_lock); + list_entry_put(entry); + } while (true); + + list_splice_tail(&still_in_list, &gpu_vm->list); + spin_unlock(&gpu_vm->list_lock); + +Due to the additional locking and atomic operations, drivers that *can* +avoid accessing the gpu_vm's list outside of the dma_resv lock +might want to avoid also this iteration scheme. Particularly, if the +driver anticipates a large number of list items. For lists where the +anticipated number of list items is small, where list iteration doesn't +happen very often or if there is a significant additional cost +associated with each iteration, the atomic operation overhead +associated with this type of iteration is, most likely, negligible. Note that +if this scheme is used, it is necessary to make sure this list +iteration is protected by an outer level lock or semaphore, since list +items are temporarily pulled off the list while iterating, and it is +also worth mentioning that the local list ``still_in_list`` should +also be considered protected by the ``gpu_vm->list_lock``, and it is +thus possible that items can be removed also from the local list +concurrently with list iteration. + +Please refer to the :ref:`DRM GPUVM locking section +` and its internal +:c:func:`get_next_vm_bo_from_list` function. + + +userptr gpu_vmas +================ + +A userptr gpu_vma is a gpu_vma that, instead of mapping a buffer object to a +GPU virtual address range, directly maps a CPU mm range of anonymous- +or file page-cache pages. +A very simple approach would be to just pin the pages using +pin_user_pages() at bind time and unpin them at unbind time, but this +creates a Denial-Of-Service vector since a single user-space process +would be able to pin down all of system memory, which is not +desirable. (For special use-cases and assuming proper accounting pinning might +still be a desirable feature, though). What we need to do in the +general case is to obtain a reference to the desired pages, make sure +we are notified using a MMU notifier just before the CPU mm unmaps the +pages, dirty them if they are not mapped read-only to the GPU, and +then drop the reference. +When we are notified by the MMU notifier that CPU mm is about to drop the +pages, we need to stop GPU access to the pages by waiting for VM idle +in the MMU notifier and make sure that before the next time the GPU +tries to access whatever is now present in the CPU mm range, we unmap +the old pages from the GPU page tables and repeat the process of +obtaining new page references. (See the :ref:`notifier example +` below). Note that when the core mm decides to +laundry pages, we get such an unmap MMU notification and can mark the +pages dirty again before the next GPU access. We also get similar MMU +notifications for NUMA accounting which the GPU driver doesn't really +need to care about, but so far it has proven difficult to exclude +certain notifications. + +Using a MMU notifier for device DMA (and other methods) is described in +:ref:`the pin_user_pages() documentation `. + +Now, the method of obtaining struct page references using +get_user_pages() unfortunately can't be used under a dma_resv lock +since that would violate the locking order of the dma_resv lock vs the +mmap_lock that is grabbed when resolving a CPU pagefault. This means +the gpu_vm's list of userptr gpu_vmas needs to be protected by an +outer lock, which in our example below is the ``gpu_vm->lock``. + +The MMU interval seqlock for a userptr gpu_vma is used in the following +way: + +.. code-block:: C + + // Exclusive locking mode here is strictly needed only if there are + // invalidated userptr gpu_vmas present, to avoid concurrent userptr + // revalidations of the same userptr gpu_vma. + down_write(&gpu_vm->lock); + retry: + + // Note: mmu_interval_read_begin() blocks until there is no + // invalidation notifier running anymore. + seq = mmu_interval_read_begin(&gpu_vma->userptr_interval); + if (seq != gpu_vma->saved_seq) { + obtain_new_page_pointers(&gpu_vma); + dma_resv_lock(&gpu_vm->resv); + add_gpu_vma_to_revalidate_list(&gpu_vma, &gpu_vm); + dma_resv_unlock(&gpu_vm->resv); + gpu_vma->saved_seq = seq; + } + + // The usual revalidation goes here. + + // Final userptr sequence validation may not happen before the + // submission dma_fence is added to the gpu_vm's resv, from the POW + // of the MMU invalidation notifier. Hence the + // userptr_notifier_lock that will make them appear atomic. + + add_dependencies(&gpu_job, &gpu_vm->resv); + down_read(&gpu_vm->userptr_notifier_lock); + if (mmu_interval_read_retry(&gpu_vma->userptr_interval, gpu_vma->saved_seq)) { + up_read(&gpu_vm->userptr_notifier_lock); + goto retry; + } + + job_dma_fence = gpu_submit(&gpu_job)); + + add_dma_fence(job_dma_fence, &gpu_vm->resv); + + for_each_external_obj(gpu_vm, &obj) + add_dma_fence(job_dma_fence, &obj->resv); + + dma_resv_unlock_all_resv_locks(); + up_read(&gpu_vm->userptr_notifier_lock); + up_write(&gpu_vm->lock); + +The code between ``mmu_interval_read_begin()`` and the +``mmu_interval_read_retry()`` marks the read side critical section of +what we call the ``userptr_seqlock``. In reality, the gpu_vm's userptr +gpu_vma list is looped through, and the check is done for *all* of its +userptr gpu_vmas, although we only show a single one here. + +The userptr gpu_vma MMU invalidation notifier might be called from +reclaim context and, again, to avoid locking order violations, we can't +take any dma_resv lock nor the gpu_vm->lock from within it. + +.. _Invalidation example: +.. code-block:: C + + bool gpu_vma_userptr_invalidate(userptr_interval, cur_seq) + { + // Make sure the exec function either sees the new sequence + // and backs off or we wait for the dma-fence: + + down_write(&gpu_vm->userptr_notifier_lock); + mmu_interval_set_seq(userptr_interval, cur_seq); + up_write(&gpu_vm->userptr_notifier_lock); + + // At this point, the exec function can't succeed in + // submitting a new job, because cur_seq is an invalid + // sequence number and will always cause a retry. When all + // invalidation callbacks, the mmu notifier core will flip + // the sequence number to a valid one. However we need to + // stop gpu access to the old pages here. + + dma_resv_wait_timeout(&gpu_vm->resv, DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT); + return true; + } + +When this invalidation notifier returns, the GPU can no longer be +accessing the old pages of the userptr gpu_vma and needs to redo the +page-binding before a new GPU submission can succeed. + +Efficient userptr gpu_vma exec_function iteration +_________________________________________________ + +If the gpu_vm's list of userptr gpu_vmas becomes large, it's +inefficient to iterate through the complete lists of userptrs on each +exec function to check whether each userptr gpu_vma's saved +sequence number is stale. A solution to this is to put all +*invalidated* userptr gpu_vmas on a separate gpu_vm list and +only check the gpu_vmas present on this list on each exec +function. This list will then lend itself very-well to the spinlock +locking scheme that is +:ref:`described in the spinlock iteration section `, since +in the mmu notifier, where we add the invalidated gpu_vmas to the +list, it's not possible to take any outer locks like the +``gpu_vm->lock`` or the ``gpu_vm->resv`` lock. Note that the +``gpu_vm->lock`` still needs to be taken while iterating to ensure the list is +complete, as also mentioned in that section. + +If using an invalidated userptr list like this, the retry check in the +exec function trivially becomes a check for invalidated list empty. + +Locking at bind and unbind time +=============================== + +At bind time, assuming a GEM object backed gpu_vma, each +gpu_vma needs to be associated with a gpu_vm_bo and that +gpu_vm_bo in turn needs to be added to the GEM object's +gpu_vm_bo list, and possibly to the gpu_vm's external object +list. This is referred to as *linking* the gpu_vma, and typically +requires that the ``gpu_vm->lock`` and the ``gem_object->gpuva_lock`` +are held. When unlinking a gpu_vma the same locks should be held, +and that ensures that when iterating over ``gpu_vmas`, either under +the ``gpu_vm->resv`` or the GEM object's dma_resv, that the gpu_vmas +stay alive as long as the lock under which we iterate is not released. For +userptr gpu_vmas it's similarly required that during vma destroy, the +outer ``gpu_vm->lock`` is held, since otherwise when iterating over +the invalidated userptr list as described in the previous section, +there is nothing keeping those userptr gpu_vmas alive. + +Locking for recoverable page-fault page-table updates +===================================================== + +There are two important things we need to ensure with locking for +recoverable page-faults: + +* At the time we return pages back to the system / allocator for + reuse, there should be no remaining GPU mappings and any GPU TLB + must have been flushed. +* The unmapping and mapping of a gpu_vma must not race. + +Since the unmapping (or zapping) of GPU ptes is typically taking place +where it is hard or even impossible to take any outer level locks we +must either introduce a new lock that is held at both mapping and +unmapping time, or look at the locks we do hold at unmapping time and +make sure that they are held also at mapping time. For userptr +gpu_vmas, the ``userptr_seqlock`` is held in write mode in the mmu +invalidation notifier where zapping happens. Hence, if the +``userptr_seqlock`` as well as the ``gpu_vm->userptr_notifier_lock`` +is held in read mode during mapping, it will not race with the +zapping. For GEM object backed gpu_vmas, zapping will take place under +the GEM object's dma_resv and ensuring that the dma_resv is held also +when populating the page-tables for any gpu_vma pointing to the GEM +object, will similarly ensure we are race-free. + +If any part of the mapping is performed asynchronously +under a dma-fence with these locks released, the zapping will need to +wait for that dma-fence to signal under the relevant lock before +starting to modify the page-table. + +Since modifying the +page-table structure in a way that frees up page-table memory +might also require outer level locks, the zapping of GPU ptes +typically focuses only on zeroing page-table or page-directory entries +and flushing TLB, whereas freeing of page-table memory is deferred to +unbind or rebind time. diff --git a/Documentation/gpu/imagination/index.rst b/Documentation/gpu/imagination/index.rst new file mode 100644 index 000000000..0c1e247ce --- /dev/null +++ b/Documentation/gpu/imagination/index.rst @@ -0,0 +1,13 @@ +======================================= +drm/imagination PowerVR Graphics Driver +======================================= + +.. kernel-doc:: drivers/gpu/drm/imagination/pvr_drv.c + :doc: PowerVR (Series 6 and later) and IMG Graphics Driver + +Contents +======== +.. toctree:: + :maxdepth: 2 + + uapi diff --git a/Documentation/gpu/imagination/uapi.rst b/Documentation/gpu/imagination/uapi.rst new file mode 100644 index 000000000..7502413d0 --- /dev/null +++ b/Documentation/gpu/imagination/uapi.rst @@ -0,0 +1,171 @@ +==== +UAPI +==== +The sources associated with this section can be found in ``pvr_drm.h``. + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :doc: PowerVR UAPI + +OBJECT ARRAYS +============= +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :identifiers: drm_pvr_obj_array + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :identifiers: DRM_PVR_OBJ_ARRAY + +IOCTLS +====== +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :doc: PowerVR IOCTL interface + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :identifiers: PVR_IOCTL + +DEV_QUERY +--------- +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :doc: PowerVR IOCTL DEV_QUERY interface + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :identifiers: drm_pvr_dev_query + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :identifiers: drm_pvr_ioctl_dev_query_args + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :identifiers: drm_pvr_dev_query_gpu_info + drm_pvr_dev_query_runtime_info + drm_pvr_dev_query_hwrt_info + drm_pvr_dev_query_quirks + drm_pvr_dev_query_enhancements + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :identifiers: drm_pvr_heap_id + drm_pvr_heap + drm_pvr_dev_query_heap_info + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :identifiers: drm_pvr_static_data_area_usage + drm_pvr_static_data_area + drm_pvr_dev_query_static_data_areas + +CREATE_BO +--------- +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :doc: PowerVR IOCTL CREATE_BO interface + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :identifiers: drm_pvr_ioctl_create_bo_args + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :doc: Flags for CREATE_BO + +GET_BO_MMAP_OFFSET +------------------ +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :doc: PowerVR IOCTL GET_BO_MMAP_OFFSET interface + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :identifiers: drm_pvr_ioctl_get_bo_mmap_offset_args + +CREATE_VM_CONTEXT and DESTROY_VM_CONTEXT +---------------------------------------- +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :doc: PowerVR IOCTL CREATE_VM_CONTEXT and DESTROY_VM_CONTEXT interfaces + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :identifiers: drm_pvr_ioctl_create_vm_context_args + drm_pvr_ioctl_destroy_vm_context_args + +VM_MAP and VM_UNMAP +------------------- +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :doc: PowerVR IOCTL VM_MAP and VM_UNMAP interfaces + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :identifiers: drm_pvr_ioctl_vm_map_args + drm_pvr_ioctl_vm_unmap_args + +CREATE_CONTEXT and DESTROY_CONTEXT +---------------------------------- +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :doc: PowerVR IOCTL CREATE_CONTEXT and DESTROY_CONTEXT interfaces + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :identifiers: drm_pvr_ioctl_create_context_args + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :identifiers: drm_pvr_ctx_priority + drm_pvr_ctx_type + drm_pvr_static_render_context_state + drm_pvr_static_render_context_state_format + drm_pvr_reset_framework + drm_pvr_reset_framework_format + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :identifiers: drm_pvr_ioctl_destroy_context_args + +CREATE_FREE_LIST and DESTROY_FREE_LIST +-------------------------------------- +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :doc: PowerVR IOCTL CREATE_FREE_LIST and DESTROY_FREE_LIST interfaces + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :identifiers: drm_pvr_ioctl_create_free_list_args + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :identifiers: drm_pvr_ioctl_destroy_free_list_args + +CREATE_HWRT_DATASET and DESTROY_HWRT_DATASET +-------------------------------------------- +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :doc: PowerVR IOCTL CREATE_HWRT_DATASET and DESTROY_HWRT_DATASET interfaces + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :identifiers: drm_pvr_ioctl_create_hwrt_dataset_args + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :identifiers: drm_pvr_create_hwrt_geom_data_args + drm_pvr_create_hwrt_rt_data_args + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :identifiers: drm_pvr_ioctl_destroy_hwrt_dataset_args + +SUBMIT_JOBS +----------- +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :doc: PowerVR IOCTL SUBMIT_JOBS interface + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :doc: Flags for the drm_pvr_sync_op object. + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :identifiers: drm_pvr_ioctl_submit_jobs_args + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :doc: Flags for SUBMIT_JOB ioctl geometry command. + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :doc: Flags for SUBMIT_JOB ioctl fragment command. + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :doc: Flags for SUBMIT_JOB ioctl compute command. + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :doc: Flags for SUBMIT_JOB ioctl transfer command. + +.. kernel-doc:: include/uapi/drm/pvr_drm.h + :identifiers: drm_pvr_sync_op + drm_pvr_job_type + drm_pvr_hwrt_data_ref + drm_pvr_job + +Internal notes +============== +.. kernel-doc:: drivers/gpu/drm/imagination/pvr_device.h + :doc: IOCTL validation helpers + +.. kernel-doc:: drivers/gpu/drm/imagination/pvr_device.h + :identifiers: PVR_STATIC_ASSERT_64BIT_ALIGNED PVR_IOCTL_UNION_PADDING_CHECK + pvr_ioctl_union_padding_check diff --git a/Documentation/gpu/implementation_guidelines.rst b/Documentation/gpu/implementation_guidelines.rst index 138e637dc..dbccfa72f 100644 --- a/Documentation/gpu/implementation_guidelines.rst +++ b/Documentation/gpu/implementation_guidelines.rst @@ -7,3 +7,4 @@ Misc DRM driver uAPI- and feature implementation guidelines .. toctree:: drm-vm-bind-async + drm-vm-bind-locking diff --git a/Documentation/gpu/rfc/xe.rst b/Documentation/gpu/rfc/xe.rst index c29113a0a..97cf87578 100644 --- a/Documentation/gpu/rfc/xe.rst +++ b/Documentation/gpu/rfc/xe.rst @@ -70,35 +70,42 @@ When the time comes for Xe, the protection will be lifted on Xe and kept in i915 Xe – Pre-Merge Goals - Work-in-Progress ======================================= -Drm_scheduler -------------- -Xe primarily uses Firmware based scheduling (GuC FW). However, it will use -drm_scheduler as the scheduler ‘frontend’ for userspace submission in order to -resolve syncobj and dma-buf implicit sync dependencies. However, drm_scheduler is -not yet prepared to handle the 1-to-1 relationship between drm_gpu_scheduler and -drm_sched_entity. +Display integration with i915 +----------------------------- +In order to share the display code with the i915 driver so that there is maximum +reuse, the i915/display/ code is built twice, once for i915.ko and then for +xe.ko. Currently, the i915/display code in Xe tree is polluted with many 'ifdefs' +depending on the build target. The goal is to refactor both Xe and i915/display +code simultaneously in order to get a clean result before they land upstream, so +that display can already be part of the initial pull request towards drm-next. -Deeper changes to drm_scheduler should *not* be required to get Xe accepted, but -some consensus needs to be reached between Xe and other community drivers that -could also benefit from this work, for coupling FW based/assisted submission such -as the ARM’s new Mali GPU driver, and others. +However, display code should not gate the acceptance of Xe in upstream. Xe +patches will be refactored in a way that display code can be removed, if needed, +from the first pull request of Xe towards drm-next. The expectation is that when +both drivers are part of the drm-tip, the introduction of cleaner patches will be +easier and speed up. -As a key measurable result, the patch series introducing Xe itself shall not -depend on any other patch touching drm_scheduler itself that was not yet merged -through drm-misc. This, by itself, already includes the reach of an agreement for -uniform 1 to 1 relationship implementation / usage across drivers. +Xe – uAPI high level overview +============================= -ASYNC VM_BIND -------------- -Although having a common DRM level IOCTL for VM_BIND is not a requirement to get -Xe merged, it is mandatory to have a consensus with other drivers and Mesa. -It needs to be clear how to handle async VM_BIND and interactions with userspace -memory fences. Ideally with helper support so people don't get it wrong in all -possible ways. +...Warning: To be done in follow up patches after/when/where the main consensus in various items are individually reached. -As a key measurable result, the benefits of ASYNC VM_BIND and a discussion of -various flavors, error handling and sample API suggestions are documented in -:doc:`The ASYNC VM_BIND document `. +Xe – Pre-Merge Goals - Completed +================================ + +Drm_exec +-------- +Helper to make dma_resv locking for a big number of buffers is getting removed in +the drm_exec series proposed in https://patchwork.freedesktop.org/patch/524376/ +If that happens, Xe needs to change and incorporate the changes in the driver. +The goal is to engage with the Community to understand if the best approach is to +move that to the drivers that are using it or if we should keep the helpers in +place waiting for Xe to get merged. + +This item ties into the GPUVA, VM_BIND, and even long-running compute support. + +As a key measurable result, we need to have a community consensus documented in +this document and the Xe driver prepared for the changes, if necessary. Userptr integration and vm_bind ------------------------------- @@ -123,10 +130,45 @@ Documentation should include: * O(1) complexity under VM_BIND. +The document is now included in the drm documentation :doc:`here `. + Some parts of userptr like mmu_notifiers should become GPUVA or DRM helpers when the second driver supporting VM_BIND+userptr appears. Details to be defined when the time comes. +The DRM GPUVM helpers do not yet include the userptr parts, but discussions +about implementing them are ongoing. + +ASYNC VM_BIND +------------- +Although having a common DRM level IOCTL for VM_BIND is not a requirement to get +Xe merged, it is mandatory to have a consensus with other drivers and Mesa. +It needs to be clear how to handle async VM_BIND and interactions with userspace +memory fences. Ideally with helper support so people don't get it wrong in all +possible ways. + +As a key measurable result, the benefits of ASYNC VM_BIND and a discussion of +various flavors, error handling and sample API suggestions are documented in +:doc:`The ASYNC VM_BIND document `. + +Drm_scheduler +------------- +Xe primarily uses Firmware based scheduling (GuC FW). However, it will use +drm_scheduler as the scheduler ‘frontend’ for userspace submission in order to +resolve syncobj and dma-buf implicit sync dependencies. However, drm_scheduler is +not yet prepared to handle the 1-to-1 relationship between drm_gpu_scheduler and +drm_sched_entity. + +Deeper changes to drm_scheduler should *not* be required to get Xe accepted, but +some consensus needs to be reached between Xe and other community drivers that +could also benefit from this work, for coupling FW based/assisted submission such +as the ARM’s new Mali GPU driver, and others. + +As a key measurable result, the patch series introducing Xe itself shall not +depend on any other patch touching drm_scheduler itself that was not yet merged +through drm-misc. This, by itself, already includes the reach of an agreement for +uniform 1 to 1 relationship implementation / usage across drivers. + Long running compute: minimal data structure/scaffolding -------------------------------------------------------- The generic scheduler code needs to include the handling of endless compute @@ -139,46 +181,6 @@ this minimal drm/scheduler work, if needed, merged to drm-misc in a way that any drm driver, including Xe, could re-use and add their own individual needs on top in a next stage. However, this should not block the initial merge. -This is a non-blocker item since the driver without the support for the long -running compute enabled is not a showstopper. - -Display integration with i915 ------------------------------ -In order to share the display code with the i915 driver so that there is maximum -reuse, the i915/display/ code is built twice, once for i915.ko and then for -xe.ko. Currently, the i915/display code in Xe tree is polluted with many 'ifdefs' -depending on the build target. The goal is to refactor both Xe and i915/display -code simultaneously in order to get a clean result before they land upstream, so -that display can already be part of the initial pull request towards drm-next. - -However, display code should not gate the acceptance of Xe in upstream. Xe -patches will be refactored in a way that display code can be removed, if needed, -from the first pull request of Xe towards drm-next. The expectation is that when -both drivers are part of the drm-tip, the introduction of cleaner patches will be -easier and speed up. - -Drm_exec --------- -Helper to make dma_resv locking for a big number of buffers is getting removed in -the drm_exec series proposed in https://patchwork.freedesktop.org/patch/524376/ -If that happens, Xe needs to change and incorporate the changes in the driver. -The goal is to engage with the Community to understand if the best approach is to -move that to the drivers that are using it or if we should keep the helpers in -place waiting for Xe to get merged. - -This item ties into the GPUVA, VM_BIND, and even long-running compute support. - -As a key measurable result, we need to have a community consensus documented in -this document and the Xe driver prepared for the changes, if necessary. - -Xe – uAPI high level overview -============================= - -...Warning: To be done in follow up patches after/when/where the main consensus in various items are individually reached. - -Xe – Pre-Merge Goals - Completed -================================ - Dev_coredump ------------ diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index 85bbe0543..41a264bf8 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -620,6 +620,23 @@ Contact: Javier Martinez Canillas Level: Intermediate +Clean up and document former selftests suites +--------------------------------------------- + +Some KUnit test suites (drm_buddy, drm_cmdline_parser, drm_damage_helper, +drm_format, drm_framebuffer, drm_dp_mst_helper, drm_mm, drm_plane_helper and +drm_rect) are former selftests suites that have been converted over when KUnit +was first introduced. + +These suites were fairly undocumented, and with different goals than what unit +tests can be. Trying to identify what each test in these suites actually test +for, whether that makes sense for a unit test, and either remove it if it +doesn't or document it if it does would be of great help. + +Contact: Maxime Ripard + +Level: Intermediate + Enable trinity for DRM ---------------------- @@ -764,6 +781,29 @@ Contact: Hans de Goede Level: Advanced +Buffer age or other damage accumulation algorithm for buffer damage +=================================================================== + +Drivers that do per-buffer uploads, need a buffer damage handling (rather than +frame damage like drivers that do per-plane or per-CRTC uploads), but there is +no support to get the buffer age or any other damage accumulation algorithm. + +For this reason, the damage helpers just fallback to a full plane update if the +framebuffer attached to a plane has changed since the last page-flip. Drivers +set &drm_plane_state.ignore_damage_clips to true as indication to +drm_atomic_helper_damage_iter_init() and drm_atomic_helper_damage_iter_next() +helpers that the damage clips should be ignored. + +This should be improved to get damage tracking properly working on drivers that +do per-buffer uploads. + +More information about damage tracking and references to learning materials can +be found in :ref:`damage_tracking_properties`. + +Contact: Javier Martinez Canillas + +Level: Advanced + Outside DRM =========== diff --git a/Documentation/gpu/xe/index.rst b/Documentation/gpu/xe/index.rst new file mode 100644 index 000000000..c224ecaee --- /dev/null +++ b/Documentation/gpu/xe/index.rst @@ -0,0 +1,25 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +======================= +drm/xe Intel GFX Driver +======================= + +The drm/xe driver supports some future GFX cards with rendering, display, +compute and media. Support for currently available platforms like TGL, ADL, +DG2, etc is provided to prototype the driver. + +.. toctree:: + :titlesonly: + + xe_mm + xe_map + xe_migrate + xe_cs + xe_pm + xe_pcode + xe_gt_mcr + xe_wa + xe_rtp + xe_firmware + xe_tile + xe_debugging diff --git a/Documentation/gpu/xe/xe_cs.rst b/Documentation/gpu/xe/xe_cs.rst new file mode 100644 index 000000000..e379aed4f --- /dev/null +++ b/Documentation/gpu/xe/xe_cs.rst @@ -0,0 +1,8 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +================== +Command submission +================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_exec.c + :doc: Execbuf (User GPU command submission) diff --git a/Documentation/gpu/xe/xe_debugging.rst b/Documentation/gpu/xe/xe_debugging.rst new file mode 100644 index 000000000..d65e56ff3 --- /dev/null +++ b/Documentation/gpu/xe/xe_debugging.rst @@ -0,0 +1,7 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +========= +Debugging +========= + +.. kernel-doc:: drivers/gpu/drm/xe/xe_assert.h diff --git a/Documentation/gpu/xe/xe_firmware.rst b/Documentation/gpu/xe/xe_firmware.rst new file mode 100644 index 000000000..afcb561cd --- /dev/null +++ b/Documentation/gpu/xe/xe_firmware.rst @@ -0,0 +1,37 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +======== +Firmware +======== + +Firmware Layout +=============== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_uc_fw_abi.h + :doc: CSS-based Firmware Layout + +.. kernel-doc:: drivers/gpu/drm/xe/xe_uc_fw_abi.h + :doc: GSC-based Firmware Layout + +Write Once Protected Content Memory (WOPCM) Layout +================================================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_wopcm.c + :doc: Write Once Protected Content Memory (WOPCM) Layout + +GuC CTB Blob +============ + +.. kernel-doc:: drivers/gpu/drm/xe/xe_guc_ct.c + :doc: GuC CTB Blob + +GuC Power Conservation (PC) +=========================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_guc_pc.c + :doc: GuC Power Conservation (PC) + +Internal API +============ + +TODO diff --git a/Documentation/gpu/xe/xe_gt_mcr.rst b/Documentation/gpu/xe/xe_gt_mcr.rst new file mode 100644 index 000000000..848c07bc3 --- /dev/null +++ b/Documentation/gpu/xe/xe_gt_mcr.rst @@ -0,0 +1,13 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +============================================== +GT Multicast/Replicated (MCR) Register Support +============================================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_gt_mcr.c + :doc: GT Multicast/Replicated (MCR) Register Support + +Internal API +============ + +TODO diff --git a/Documentation/gpu/xe/xe_map.rst b/Documentation/gpu/xe/xe_map.rst new file mode 100644 index 000000000..a098cfd2d --- /dev/null +++ b/Documentation/gpu/xe/xe_map.rst @@ -0,0 +1,8 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +========= +Map Layer +========= + +.. kernel-doc:: drivers/gpu/drm/xe/xe_map.h + :doc: Map layer diff --git a/Documentation/gpu/xe/xe_migrate.rst b/Documentation/gpu/xe/xe_migrate.rst new file mode 100644 index 000000000..f92faec0a --- /dev/null +++ b/Documentation/gpu/xe/xe_migrate.rst @@ -0,0 +1,8 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +============= +Migrate Layer +============= + +.. kernel-doc:: drivers/gpu/drm/xe/xe_migrate_doc.h + :doc: Migrate Layer diff --git a/Documentation/gpu/xe/xe_mm.rst b/Documentation/gpu/xe/xe_mm.rst new file mode 100644 index 000000000..6c8fd8b4a --- /dev/null +++ b/Documentation/gpu/xe/xe_mm.rst @@ -0,0 +1,14 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +================= +Memory Management +================= + +.. kernel-doc:: drivers/gpu/drm/xe/xe_bo_doc.h + :doc: Buffer Objects (BO) + +Pagetable building +================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_pt.c + :doc: Pagetable building diff --git a/Documentation/gpu/xe/xe_pcode.rst b/Documentation/gpu/xe/xe_pcode.rst new file mode 100644 index 000000000..d2e22cc45 --- /dev/null +++ b/Documentation/gpu/xe/xe_pcode.rst @@ -0,0 +1,14 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +===== +Pcode +===== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_pcode.c + :doc: PCODE + +Internal API +============ + +.. kernel-doc:: drivers/gpu/drm/xe/xe_pcode.c + :internal: diff --git a/Documentation/gpu/xe/xe_pm.rst b/Documentation/gpu/xe/xe_pm.rst new file mode 100644 index 000000000..6781cdfb2 --- /dev/null +++ b/Documentation/gpu/xe/xe_pm.rst @@ -0,0 +1,14 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +======================== +Runtime Power Management +======================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_pm.c + :doc: Xe Power Management + +Internal API +============ + +.. kernel-doc:: drivers/gpu/drm/xe/xe_pm.c + :internal: diff --git a/Documentation/gpu/xe/xe_rtp.rst b/Documentation/gpu/xe/xe_rtp.rst new file mode 100644 index 000000000..7fdf4b6c1 --- /dev/null +++ b/Documentation/gpu/xe/xe_rtp.rst @@ -0,0 +1,20 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +========================= +Register Table Processing +========================= + +.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp.c + :doc: Register Table Processing + +Internal API +============ + +.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp_types.h + :internal: + +.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp.h + :internal: + +.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp.c + :internal: diff --git a/Documentation/gpu/xe/xe_tile.rst b/Documentation/gpu/xe/xe_tile.rst new file mode 100644 index 000000000..c33f68dd9 --- /dev/null +++ b/Documentation/gpu/xe/xe_tile.rst @@ -0,0 +1,14 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +================== +Multi-tile Devices +================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_tile.c + :doc: Multi-tile Design + +Internal API +============ + +.. kernel-doc:: drivers/gpu/drm/xe/xe_tile.c + :internal: diff --git a/Documentation/gpu/xe/xe_wa.rst b/Documentation/gpu/xe/xe_wa.rst new file mode 100644 index 000000000..f8811cc6a --- /dev/null +++ b/Documentation/gpu/xe/xe_wa.rst @@ -0,0 +1,14 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +==================== +Hardware workarounds +==================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_wa.c + :doc: Hardware workarounds + +Internal API +============ + +.. kernel-doc:: drivers/gpu/drm/xe/xe_wa.c + :internal: -- cgit v1.2.3