summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/vc4
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/vc4')
-rw-r--r--drivers/gpu/drm/vc4/Kconfig29
-rw-r--r--drivers/gpu/drm/vc4/Makefile29
-rw-r--r--drivers/gpu/drm/vc4/vc4_bo.c1107
-rw-r--r--drivers/gpu/drm/vc4/vc4_crtc.c1254
-rw-r--r--drivers/gpu/drm/vc4/vc4_debugfs.c40
-rw-r--r--drivers/gpu/drm/vc4/vc4_dpi.c402
-rw-r--r--drivers/gpu/drm/vc4/vc4_drv.c413
-rw-r--r--drivers/gpu/drm/vc4/vc4_drv.h822
-rw-r--r--drivers/gpu/drm/vc4/vc4_dsi.c1744
-rw-r--r--drivers/gpu/drm/vc4/vc4_fence.c48
-rw-r--r--drivers/gpu/drm/vc4/vc4_gem.c1377
-rw-r--r--drivers/gpu/drm/vc4/vc4_hdmi.c1520
-rw-r--r--drivers/gpu/drm/vc4/vc4_hvs.c277
-rw-r--r--drivers/gpu/drm/vc4/vc4_irq.c290
-rw-r--r--drivers/gpu/drm/vc4/vc4_kms.c449
-rw-r--r--drivers/gpu/drm/vc4/vc4_packet.h399
-rw-r--r--drivers/gpu/drm/vc4/vc4_perfmon.c188
-rw-r--r--drivers/gpu/drm/vc4/vc4_plane.c1015
-rw-r--r--drivers/gpu/drm/vc4/vc4_qpu_defines.h279
-rw-r--r--drivers/gpu/drm/vc4/vc4_regs.h1051
-rw-r--r--drivers/gpu/drm/vc4/vc4_render_cl.c661
-rw-r--r--drivers/gpu/drm/vc4/vc4_trace.h63
-rw-r--r--drivers/gpu/drm/vc4/vc4_trace_points.c14
-rw-r--r--drivers/gpu/drm/vc4/vc4_txp.c483
-rw-r--r--drivers/gpu/drm/vc4/vc4_v3d.c470
-rw-r--r--drivers/gpu/drm/vc4/vc4_validate.c939
-rw-r--r--drivers/gpu/drm/vc4/vc4_validate_shaders.c950
-rw-r--r--drivers/gpu/drm/vc4/vc4_vec.c661
28 files changed, 16974 insertions, 0 deletions
diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig
new file mode 100644
index 000000000..fdae18aea
--- /dev/null
+++ b/drivers/gpu/drm/vc4/Kconfig
@@ -0,0 +1,29 @@
+config DRM_VC4
+ tristate "Broadcom VC4 Graphics"
+ depends on ARCH_BCM || ARCH_BCM2835 || COMPILE_TEST
+ depends on DRM
+ depends on SND && SND_SOC
+ depends on COMMON_CLK
+ select DRM_KMS_HELPER
+ select DRM_KMS_CMA_HELPER
+ select DRM_GEM_CMA_HELPER
+ select DRM_PANEL_BRIDGE
+ select SND_PCM
+ select SND_PCM_ELD
+ select SND_SOC_GENERIC_DMAENGINE_PCM
+ select DRM_MIPI_DSI
+ help
+ Choose this option if you have a system that has a Broadcom
+ VC4 GPU, such as the Raspberry Pi or other BCM2708/BCM2835.
+
+ This driver requires that "avoid_warnings=2" be present in
+ the config.txt for the firmware, to keep it from smashing
+ our display setup.
+
+config DRM_VC4_HDMI_CEC
+ bool "Broadcom VC4 HDMI CEC Support"
+ depends on DRM_VC4
+ select CEC_CORE
+ help
+ Choose this option if you have a Broadcom VC4 GPU
+ and want to use CEC.
diff --git a/drivers/gpu/drm/vc4/Makefile b/drivers/gpu/drm/vc4/Makefile
new file mode 100644
index 000000000..b303703bc
--- /dev/null
+++ b/drivers/gpu/drm/vc4/Makefile
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0
+# Please keep these build lists sorted!
+
+# core driver code
+vc4-y := \
+ vc4_bo.o \
+ vc4_crtc.o \
+ vc4_drv.o \
+ vc4_dpi.o \
+ vc4_dsi.o \
+ vc4_fence.o \
+ vc4_kms.o \
+ vc4_gem.o \
+ vc4_hdmi.o \
+ vc4_vec.o \
+ vc4_hvs.o \
+ vc4_irq.o \
+ vc4_perfmon.o \
+ vc4_plane.o \
+ vc4_render_cl.o \
+ vc4_trace_points.o \
+ vc4_txp.o \
+ vc4_v3d.o \
+ vc4_validate.o \
+ vc4_validate_shaders.o
+
+vc4-$(CONFIG_DEBUG_FS) += vc4_debugfs.o
+
+obj-$(CONFIG_DRM_VC4) += vc4.o
diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
new file mode 100644
index 000000000..1e28ff981
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -0,0 +1,1107 @@
+/*
+ * Copyright © 2015 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/**
+ * DOC: VC4 GEM BO management support
+ *
+ * The VC4 GPU architecture (both scanout and rendering) has direct
+ * access to system memory with no MMU in between. To support it, we
+ * use the GEM CMA helper functions to allocate contiguous ranges of
+ * physical memory for our BOs.
+ *
+ * Since the CMA allocator is very slow, we keep a cache of recently
+ * freed BOs around so that the kernel's allocation of objects for 3D
+ * rendering can return quickly.
+ */
+
+#include <linux/dma-buf.h>
+
+#include "vc4_drv.h"
+#include "uapi/drm/vc4_drm.h"
+
+static const char * const bo_type_names[] = {
+ "kernel",
+ "V3D",
+ "V3D shader",
+ "dumb",
+ "binner",
+ "RCL",
+ "BCL",
+ "kernel BO cache",
+};
+
+static bool is_user_label(int label)
+{
+ return label >= VC4_BO_TYPE_COUNT;
+}
+
+static void vc4_bo_stats_dump(struct vc4_dev *vc4)
+{
+ int i;
+
+ for (i = 0; i < vc4->num_labels; i++) {
+ if (!vc4->bo_labels[i].num_allocated)
+ continue;
+
+ DRM_INFO("%30s: %6dkb BOs (%d)\n",
+ vc4->bo_labels[i].name,
+ vc4->bo_labels[i].size_allocated / 1024,
+ vc4->bo_labels[i].num_allocated);
+ }
+
+ mutex_lock(&vc4->purgeable.lock);
+ if (vc4->purgeable.num)
+ DRM_INFO("%30s: %6zdkb BOs (%d)\n", "userspace BO cache",
+ vc4->purgeable.size / 1024, vc4->purgeable.num);
+
+ if (vc4->purgeable.purged_num)
+ DRM_INFO("%30s: %6zdkb BOs (%d)\n", "total purged BO",
+ vc4->purgeable.purged_size / 1024,
+ vc4->purgeable.purged_num);
+ mutex_unlock(&vc4->purgeable.lock);
+}
+
+#ifdef CONFIG_DEBUG_FS
+int vc4_bo_stats_debugfs(struct seq_file *m, void *unused)
+{
+ struct drm_info_node *node = (struct drm_info_node *)m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ int i;
+
+ mutex_lock(&vc4->bo_lock);
+ for (i = 0; i < vc4->num_labels; i++) {
+ if (!vc4->bo_labels[i].num_allocated)
+ continue;
+
+ seq_printf(m, "%30s: %6dkb BOs (%d)\n",
+ vc4->bo_labels[i].name,
+ vc4->bo_labels[i].size_allocated / 1024,
+ vc4->bo_labels[i].num_allocated);
+ }
+ mutex_unlock(&vc4->bo_lock);
+
+ mutex_lock(&vc4->purgeable.lock);
+ if (vc4->purgeable.num)
+ seq_printf(m, "%30s: %6zdkb BOs (%d)\n", "userspace BO cache",
+ vc4->purgeable.size / 1024, vc4->purgeable.num);
+
+ if (vc4->purgeable.purged_num)
+ seq_printf(m, "%30s: %6zdkb BOs (%d)\n", "total purged BO",
+ vc4->purgeable.purged_size / 1024,
+ vc4->purgeable.purged_num);
+ mutex_unlock(&vc4->purgeable.lock);
+
+ return 0;
+}
+#endif
+
+/* Takes ownership of *name and returns the appropriate slot for it in
+ * the bo_labels[] array, extending it as necessary.
+ *
+ * This is inefficient and could use a hash table instead of walking
+ * an array and strcmp()ing. However, the assumption is that user
+ * labeling will be infrequent (scanout buffers and other long-lived
+ * objects, or debug driver builds), so we can live with it for now.
+ */
+static int vc4_get_user_label(struct vc4_dev *vc4, const char *name)
+{
+ int i;
+ int free_slot = -1;
+
+ for (i = 0; i < vc4->num_labels; i++) {
+ if (!vc4->bo_labels[i].name) {
+ free_slot = i;
+ } else if (strcmp(vc4->bo_labels[i].name, name) == 0) {
+ kfree(name);
+ return i;
+ }
+ }
+
+ if (free_slot != -1) {
+ WARN_ON(vc4->bo_labels[free_slot].num_allocated != 0);
+ vc4->bo_labels[free_slot].name = name;
+ return free_slot;
+ } else {
+ u32 new_label_count = vc4->num_labels + 1;
+ struct vc4_label *new_labels =
+ krealloc(vc4->bo_labels,
+ new_label_count * sizeof(*new_labels),
+ GFP_KERNEL);
+
+ if (!new_labels) {
+ kfree(name);
+ return -1;
+ }
+
+ free_slot = vc4->num_labels;
+ vc4->bo_labels = new_labels;
+ vc4->num_labels = new_label_count;
+
+ vc4->bo_labels[free_slot].name = name;
+ vc4->bo_labels[free_slot].num_allocated = 0;
+ vc4->bo_labels[free_slot].size_allocated = 0;
+
+ return free_slot;
+ }
+}
+
+static void vc4_bo_set_label(struct drm_gem_object *gem_obj, int label)
+{
+ struct vc4_bo *bo = to_vc4_bo(gem_obj);
+ struct vc4_dev *vc4 = to_vc4_dev(gem_obj->dev);
+
+ lockdep_assert_held(&vc4->bo_lock);
+
+ if (label != -1) {
+ vc4->bo_labels[label].num_allocated++;
+ vc4->bo_labels[label].size_allocated += gem_obj->size;
+ }
+
+ vc4->bo_labels[bo->label].num_allocated--;
+ vc4->bo_labels[bo->label].size_allocated -= gem_obj->size;
+
+ if (vc4->bo_labels[bo->label].num_allocated == 0 &&
+ is_user_label(bo->label)) {
+ /* Free user BO label slots on last unreference.
+ * Slots are just where we track the stats for a given
+ * name, and once a name is unused we can reuse that
+ * slot.
+ */
+ kfree(vc4->bo_labels[bo->label].name);
+ vc4->bo_labels[bo->label].name = NULL;
+ }
+
+ bo->label = label;
+}
+
+static uint32_t bo_page_index(size_t size)
+{
+ return (size / PAGE_SIZE) - 1;
+}
+
+static void vc4_bo_destroy(struct vc4_bo *bo)
+{
+ struct drm_gem_object *obj = &bo->base.base;
+ struct vc4_dev *vc4 = to_vc4_dev(obj->dev);
+
+ lockdep_assert_held(&vc4->bo_lock);
+
+ vc4_bo_set_label(obj, -1);
+
+ if (bo->validated_shader) {
+ kfree(bo->validated_shader->uniform_addr_offsets);
+ kfree(bo->validated_shader->texture_samples);
+ kfree(bo->validated_shader);
+ bo->validated_shader = NULL;
+ }
+
+ reservation_object_fini(&bo->_resv);
+
+ drm_gem_cma_free_object(obj);
+}
+
+static void vc4_bo_remove_from_cache(struct vc4_bo *bo)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev);
+
+ lockdep_assert_held(&vc4->bo_lock);
+ list_del(&bo->unref_head);
+ list_del(&bo->size_head);
+}
+
+static struct list_head *vc4_get_cache_list_for_size(struct drm_device *dev,
+ size_t size)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ uint32_t page_index = bo_page_index(size);
+
+ if (vc4->bo_cache.size_list_size <= page_index) {
+ uint32_t new_size = max(vc4->bo_cache.size_list_size * 2,
+ page_index + 1);
+ struct list_head *new_list;
+ uint32_t i;
+
+ new_list = kmalloc_array(new_size, sizeof(struct list_head),
+ GFP_KERNEL);
+ if (!new_list)
+ return NULL;
+
+ /* Rebase the old cached BO lists to their new list
+ * head locations.
+ */
+ for (i = 0; i < vc4->bo_cache.size_list_size; i++) {
+ struct list_head *old_list =
+ &vc4->bo_cache.size_list[i];
+
+ if (list_empty(old_list))
+ INIT_LIST_HEAD(&new_list[i]);
+ else
+ list_replace(old_list, &new_list[i]);
+ }
+ /* And initialize the brand new BO list heads. */
+ for (i = vc4->bo_cache.size_list_size; i < new_size; i++)
+ INIT_LIST_HEAD(&new_list[i]);
+
+ kfree(vc4->bo_cache.size_list);
+ vc4->bo_cache.size_list = new_list;
+ vc4->bo_cache.size_list_size = new_size;
+ }
+
+ return &vc4->bo_cache.size_list[page_index];
+}
+
+static void vc4_bo_cache_purge(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+ mutex_lock(&vc4->bo_lock);
+ while (!list_empty(&vc4->bo_cache.time_list)) {
+ struct vc4_bo *bo = list_last_entry(&vc4->bo_cache.time_list,
+ struct vc4_bo, unref_head);
+ vc4_bo_remove_from_cache(bo);
+ vc4_bo_destroy(bo);
+ }
+ mutex_unlock(&vc4->bo_lock);
+}
+
+void vc4_bo_add_to_purgeable_pool(struct vc4_bo *bo)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev);
+
+ mutex_lock(&vc4->purgeable.lock);
+ list_add_tail(&bo->size_head, &vc4->purgeable.list);
+ vc4->purgeable.num++;
+ vc4->purgeable.size += bo->base.base.size;
+ mutex_unlock(&vc4->purgeable.lock);
+}
+
+static void vc4_bo_remove_from_purgeable_pool_locked(struct vc4_bo *bo)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev);
+
+ /* list_del_init() is used here because the caller might release
+ * the purgeable lock in order to acquire the madv one and update the
+ * madv status.
+ * During this short period of time a user might decide to mark
+ * the BO as unpurgeable, and if bo->madv is set to
+ * VC4_MADV_DONTNEED it will try to remove the BO from the
+ * purgeable list which will fail if the ->next/prev fields
+ * are set to LIST_POISON1/LIST_POISON2 (which is what
+ * list_del() does).
+ * Re-initializing the list element guarantees that list_del()
+ * will work correctly even if it's a NOP.
+ */
+ list_del_init(&bo->size_head);
+ vc4->purgeable.num--;
+ vc4->purgeable.size -= bo->base.base.size;
+}
+
+void vc4_bo_remove_from_purgeable_pool(struct vc4_bo *bo)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev);
+
+ mutex_lock(&vc4->purgeable.lock);
+ vc4_bo_remove_from_purgeable_pool_locked(bo);
+ mutex_unlock(&vc4->purgeable.lock);
+}
+
+static void vc4_bo_purge(struct drm_gem_object *obj)
+{
+ struct vc4_bo *bo = to_vc4_bo(obj);
+ struct drm_device *dev = obj->dev;
+
+ WARN_ON(!mutex_is_locked(&bo->madv_lock));
+ WARN_ON(bo->madv != VC4_MADV_DONTNEED);
+
+ drm_vma_node_unmap(&obj->vma_node, dev->anon_inode->i_mapping);
+
+ dma_free_wc(dev->dev, obj->size, bo->base.vaddr, bo->base.paddr);
+ bo->base.vaddr = NULL;
+ bo->madv = __VC4_MADV_PURGED;
+}
+
+static void vc4_bo_userspace_cache_purge(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+ mutex_lock(&vc4->purgeable.lock);
+ while (!list_empty(&vc4->purgeable.list)) {
+ struct vc4_bo *bo = list_first_entry(&vc4->purgeable.list,
+ struct vc4_bo, size_head);
+ struct drm_gem_object *obj = &bo->base.base;
+ size_t purged_size = 0;
+
+ vc4_bo_remove_from_purgeable_pool_locked(bo);
+
+ /* Release the purgeable lock while we're purging the BO so
+ * that other people can continue inserting things in the
+ * purgeable pool without having to wait for all BOs to be
+ * purged.
+ */
+ mutex_unlock(&vc4->purgeable.lock);
+ mutex_lock(&bo->madv_lock);
+
+ /* Since we released the purgeable pool lock before acquiring
+ * the BO madv one, the user may have marked the BO as WILLNEED
+ * and re-used it in the meantime.
+ * Before purging the BO we need to make sure
+ * - it is still marked as DONTNEED
+ * - it has not been re-inserted in the purgeable list
+ * - it is not used by HW blocks
+ * If one of these conditions is not met, just skip the entry.
+ */
+ if (bo->madv == VC4_MADV_DONTNEED &&
+ list_empty(&bo->size_head) &&
+ !refcount_read(&bo->usecnt)) {
+ purged_size = bo->base.base.size;
+ vc4_bo_purge(obj);
+ }
+ mutex_unlock(&bo->madv_lock);
+ mutex_lock(&vc4->purgeable.lock);
+
+ if (purged_size) {
+ vc4->purgeable.purged_size += purged_size;
+ vc4->purgeable.purged_num++;
+ }
+ }
+ mutex_unlock(&vc4->purgeable.lock);
+}
+
+static struct vc4_bo *vc4_bo_get_from_cache(struct drm_device *dev,
+ uint32_t size,
+ enum vc4_kernel_bo_type type)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ uint32_t page_index = bo_page_index(size);
+ struct vc4_bo *bo = NULL;
+
+ size = roundup(size, PAGE_SIZE);
+
+ mutex_lock(&vc4->bo_lock);
+ if (page_index >= vc4->bo_cache.size_list_size)
+ goto out;
+
+ if (list_empty(&vc4->bo_cache.size_list[page_index]))
+ goto out;
+
+ bo = list_first_entry(&vc4->bo_cache.size_list[page_index],
+ struct vc4_bo, size_head);
+ vc4_bo_remove_from_cache(bo);
+ kref_init(&bo->base.base.refcount);
+
+out:
+ if (bo)
+ vc4_bo_set_label(&bo->base.base, type);
+ mutex_unlock(&vc4->bo_lock);
+ return bo;
+}
+
+/**
+ * vc4_gem_create_object - Implementation of driver->gem_create_object.
+ * @dev: DRM device
+ * @size: Size in bytes of the memory the object will reference
+ *
+ * This lets the CMA helpers allocate object structs for us, and keep
+ * our BO stats correct.
+ */
+struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_bo *bo;
+
+ bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+ if (!bo)
+ return NULL;
+
+ bo->madv = VC4_MADV_WILLNEED;
+ refcount_set(&bo->usecnt, 0);
+ mutex_init(&bo->madv_lock);
+ mutex_lock(&vc4->bo_lock);
+ bo->label = VC4_BO_TYPE_KERNEL;
+ vc4->bo_labels[VC4_BO_TYPE_KERNEL].num_allocated++;
+ vc4->bo_labels[VC4_BO_TYPE_KERNEL].size_allocated += size;
+ mutex_unlock(&vc4->bo_lock);
+ bo->resv = &bo->_resv;
+ reservation_object_init(bo->resv);
+
+ return &bo->base.base;
+}
+
+struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size,
+ bool allow_unzeroed, enum vc4_kernel_bo_type type)
+{
+ size_t size = roundup(unaligned_size, PAGE_SIZE);
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct drm_gem_cma_object *cma_obj;
+ struct vc4_bo *bo;
+
+ if (size == 0)
+ return ERR_PTR(-EINVAL);
+
+ /* First, try to get a vc4_bo from the kernel BO cache. */
+ bo = vc4_bo_get_from_cache(dev, size, type);
+ if (bo) {
+ if (!allow_unzeroed)
+ memset(bo->base.vaddr, 0, bo->base.base.size);
+ return bo;
+ }
+
+ cma_obj = drm_gem_cma_create(dev, size);
+ if (IS_ERR(cma_obj)) {
+ /*
+ * If we've run out of CMA memory, kill the cache of
+ * CMA allocations we've got laying around and try again.
+ */
+ vc4_bo_cache_purge(dev);
+ cma_obj = drm_gem_cma_create(dev, size);
+ }
+
+ if (IS_ERR(cma_obj)) {
+ /*
+ * Still not enough CMA memory, purge the userspace BO
+ * cache and retry.
+ * This is sub-optimal since we purge the whole userspace
+ * BO cache which forces user that want to re-use the BO to
+ * restore its initial content.
+ * Ideally, we should purge entries one by one and retry
+ * after each to see if CMA allocation succeeds. Or even
+ * better, try to find an entry with at least the same
+ * size.
+ */
+ vc4_bo_userspace_cache_purge(dev);
+ cma_obj = drm_gem_cma_create(dev, size);
+ }
+
+ if (IS_ERR(cma_obj)) {
+ DRM_ERROR("Failed to allocate from CMA:\n");
+ vc4_bo_stats_dump(vc4);
+ return ERR_PTR(-ENOMEM);
+ }
+ bo = to_vc4_bo(&cma_obj->base);
+
+ /* By default, BOs do not support the MADV ioctl. This will be enabled
+ * only on BOs that are exposed to userspace (V3D, V3D_SHADER and DUMB
+ * BOs).
+ */
+ bo->madv = __VC4_MADV_NOTSUPP;
+
+ mutex_lock(&vc4->bo_lock);
+ vc4_bo_set_label(&cma_obj->base, type);
+ mutex_unlock(&vc4->bo_lock);
+
+ return bo;
+}
+
+int vc4_dumb_create(struct drm_file *file_priv,
+ struct drm_device *dev,
+ struct drm_mode_create_dumb *args)
+{
+ int min_pitch = DIV_ROUND_UP(args->width * args->bpp, 8);
+ struct vc4_bo *bo = NULL;
+ int ret;
+
+ if (args->pitch < min_pitch)
+ args->pitch = min_pitch;
+
+ if (args->size < args->pitch * args->height)
+ args->size = args->pitch * args->height;
+
+ bo = vc4_bo_create(dev, args->size, false, VC4_BO_TYPE_DUMB);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ bo->madv = VC4_MADV_WILLNEED;
+
+ ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
+ drm_gem_object_put_unlocked(&bo->base.base);
+
+ return ret;
+}
+
+static void vc4_bo_cache_free_old(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ unsigned long expire_time = jiffies - msecs_to_jiffies(1000);
+
+ lockdep_assert_held(&vc4->bo_lock);
+
+ while (!list_empty(&vc4->bo_cache.time_list)) {
+ struct vc4_bo *bo = list_last_entry(&vc4->bo_cache.time_list,
+ struct vc4_bo, unref_head);
+ if (time_before(expire_time, bo->free_time)) {
+ mod_timer(&vc4->bo_cache.time_timer,
+ round_jiffies_up(jiffies +
+ msecs_to_jiffies(1000)));
+ return;
+ }
+
+ vc4_bo_remove_from_cache(bo);
+ vc4_bo_destroy(bo);
+ }
+}
+
+/* Called on the last userspace/kernel unreference of the BO. Returns
+ * it to the BO cache if possible, otherwise frees it.
+ */
+void vc4_free_object(struct drm_gem_object *gem_bo)
+{
+ struct drm_device *dev = gem_bo->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_bo *bo = to_vc4_bo(gem_bo);
+ struct list_head *cache_list;
+
+ /* Remove the BO from the purgeable list. */
+ mutex_lock(&bo->madv_lock);
+ if (bo->madv == VC4_MADV_DONTNEED && !refcount_read(&bo->usecnt))
+ vc4_bo_remove_from_purgeable_pool(bo);
+ mutex_unlock(&bo->madv_lock);
+
+ mutex_lock(&vc4->bo_lock);
+ /* If the object references someone else's memory, we can't cache it.
+ */
+ if (gem_bo->import_attach) {
+ vc4_bo_destroy(bo);
+ goto out;
+ }
+
+ /* Don't cache if it was publicly named. */
+ if (gem_bo->name) {
+ vc4_bo_destroy(bo);
+ goto out;
+ }
+
+ /* If this object was partially constructed but CMA allocation
+ * had failed, just free it. Can also happen when the BO has been
+ * purged.
+ */
+ if (!bo->base.vaddr) {
+ vc4_bo_destroy(bo);
+ goto out;
+ }
+
+ cache_list = vc4_get_cache_list_for_size(dev, gem_bo->size);
+ if (!cache_list) {
+ vc4_bo_destroy(bo);
+ goto out;
+ }
+
+ if (bo->validated_shader) {
+ kfree(bo->validated_shader->uniform_addr_offsets);
+ kfree(bo->validated_shader->texture_samples);
+ kfree(bo->validated_shader);
+ bo->validated_shader = NULL;
+ }
+
+ /* Reset madv and usecnt before adding the BO to the cache. */
+ bo->madv = __VC4_MADV_NOTSUPP;
+ refcount_set(&bo->usecnt, 0);
+
+ bo->t_format = false;
+ bo->free_time = jiffies;
+ list_add(&bo->size_head, cache_list);
+ list_add(&bo->unref_head, &vc4->bo_cache.time_list);
+
+ vc4_bo_set_label(&bo->base.base, VC4_BO_TYPE_KERNEL_CACHE);
+
+ vc4_bo_cache_free_old(dev);
+
+out:
+ mutex_unlock(&vc4->bo_lock);
+}
+
+static void vc4_bo_cache_time_work(struct work_struct *work)
+{
+ struct vc4_dev *vc4 =
+ container_of(work, struct vc4_dev, bo_cache.time_work);
+ struct drm_device *dev = vc4->dev;
+
+ mutex_lock(&vc4->bo_lock);
+ vc4_bo_cache_free_old(dev);
+ mutex_unlock(&vc4->bo_lock);
+}
+
+int vc4_bo_inc_usecnt(struct vc4_bo *bo)
+{
+ int ret;
+
+ /* Fast path: if the BO is already retained by someone, no need to
+ * check the madv status.
+ */
+ if (refcount_inc_not_zero(&bo->usecnt))
+ return 0;
+
+ mutex_lock(&bo->madv_lock);
+ switch (bo->madv) {
+ case VC4_MADV_WILLNEED:
+ if (!refcount_inc_not_zero(&bo->usecnt))
+ refcount_set(&bo->usecnt, 1);
+ ret = 0;
+ break;
+ case VC4_MADV_DONTNEED:
+ /* We shouldn't use a BO marked as purgeable if at least
+ * someone else retained its content by incrementing usecnt.
+ * Luckily the BO hasn't been purged yet, but something wrong
+ * is happening here. Just throw an error instead of
+ * authorizing this use case.
+ */
+ case __VC4_MADV_PURGED:
+ /* We can't use a purged BO. */
+ default:
+ /* Invalid madv value. */
+ ret = -EINVAL;
+ break;
+ }
+ mutex_unlock(&bo->madv_lock);
+
+ return ret;
+}
+
+void vc4_bo_dec_usecnt(struct vc4_bo *bo)
+{
+ /* Fast path: if the BO is still retained by someone, no need to test
+ * the madv value.
+ */
+ if (refcount_dec_not_one(&bo->usecnt))
+ return;
+
+ mutex_lock(&bo->madv_lock);
+ if (refcount_dec_and_test(&bo->usecnt) &&
+ bo->madv == VC4_MADV_DONTNEED)
+ vc4_bo_add_to_purgeable_pool(bo);
+ mutex_unlock(&bo->madv_lock);
+}
+
+static void vc4_bo_cache_time_timer(struct timer_list *t)
+{
+ struct vc4_dev *vc4 = from_timer(vc4, t, bo_cache.time_timer);
+
+ schedule_work(&vc4->bo_cache.time_work);
+}
+
+struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj)
+{
+ struct vc4_bo *bo = to_vc4_bo(obj);
+
+ return bo->resv;
+}
+
+struct dma_buf *
+vc4_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int flags)
+{
+ struct vc4_bo *bo = to_vc4_bo(obj);
+ struct dma_buf *dmabuf;
+ int ret;
+
+ if (bo->validated_shader) {
+ DRM_DEBUG("Attempting to export shader BO\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ /* Note: as soon as the BO is exported it becomes unpurgeable, because
+ * noone ever decrements the usecnt even if the reference held by the
+ * exported BO is released. This shouldn't be a problem since we don't
+ * expect exported BOs to be marked as purgeable.
+ */
+ ret = vc4_bo_inc_usecnt(bo);
+ if (ret) {
+ DRM_ERROR("Failed to increment BO usecnt\n");
+ return ERR_PTR(ret);
+ }
+
+ dmabuf = drm_gem_prime_export(dev, obj, flags);
+ if (IS_ERR(dmabuf))
+ vc4_bo_dec_usecnt(bo);
+
+ return dmabuf;
+}
+
+vm_fault_t vc4_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct drm_gem_object *obj = vma->vm_private_data;
+ struct vc4_bo *bo = to_vc4_bo(obj);
+
+ /* The only reason we would end up here is when user-space accesses
+ * BO's memory after it's been purged.
+ */
+ mutex_lock(&bo->madv_lock);
+ WARN_ON(bo->madv != __VC4_MADV_PURGED);
+ mutex_unlock(&bo->madv_lock);
+
+ return VM_FAULT_SIGBUS;
+}
+
+int vc4_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct drm_gem_object *gem_obj;
+ unsigned long vm_pgoff;
+ struct vc4_bo *bo;
+ int ret;
+
+ ret = drm_gem_mmap(filp, vma);
+ if (ret)
+ return ret;
+
+ gem_obj = vma->vm_private_data;
+ bo = to_vc4_bo(gem_obj);
+
+ if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) {
+ DRM_DEBUG("mmaping of shader BOs for writing not allowed.\n");
+ return -EINVAL;
+ }
+
+ if (bo->madv != VC4_MADV_WILLNEED) {
+ DRM_DEBUG("mmaping of %s BO not allowed\n",
+ bo->madv == VC4_MADV_DONTNEED ?
+ "purgeable" : "purged");
+ return -EINVAL;
+ }
+
+ /*
+ * Clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the
+ * vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map
+ * the whole buffer.
+ */
+ vma->vm_flags &= ~VM_PFNMAP;
+
+ /* This ->vm_pgoff dance is needed to make all parties happy:
+ * - dma_mmap_wc() uses ->vm_pgoff as an offset within the allocated
+ * mem-region, hence the need to set it to zero (the value set by
+ * the DRM core is a virtual offset encoding the GEM object-id)
+ * - the mmap() core logic needs ->vm_pgoff to be restored to its
+ * initial value before returning from this function because it
+ * encodes the offset of this GEM in the dev->anon_inode pseudo-file
+ * and this information will be used when we invalidate userspace
+ * mappings with drm_vma_node_unmap() (called from vc4_gem_purge()).
+ */
+ vm_pgoff = vma->vm_pgoff;
+ vma->vm_pgoff = 0;
+ ret = dma_mmap_wc(bo->base.base.dev->dev, vma, bo->base.vaddr,
+ bo->base.paddr, vma->vm_end - vma->vm_start);
+ vma->vm_pgoff = vm_pgoff;
+
+ if (ret)
+ drm_gem_vm_close(vma);
+
+ return ret;
+}
+
+int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+ struct vc4_bo *bo = to_vc4_bo(obj);
+
+ if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) {
+ DRM_DEBUG("mmaping of shader BOs for writing not allowed.\n");
+ return -EINVAL;
+ }
+
+ return drm_gem_cma_prime_mmap(obj, vma);
+}
+
+void *vc4_prime_vmap(struct drm_gem_object *obj)
+{
+ struct vc4_bo *bo = to_vc4_bo(obj);
+
+ if (bo->validated_shader) {
+ DRM_DEBUG("mmaping of shader BOs not allowed.\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ return drm_gem_cma_prime_vmap(obj);
+}
+
+struct drm_gem_object *
+vc4_prime_import_sg_table(struct drm_device *dev,
+ struct dma_buf_attachment *attach,
+ struct sg_table *sgt)
+{
+ struct drm_gem_object *obj;
+ struct vc4_bo *bo;
+
+ obj = drm_gem_cma_prime_import_sg_table(dev, attach, sgt);
+ if (IS_ERR(obj))
+ return obj;
+
+ bo = to_vc4_bo(obj);
+ bo->resv = attach->dmabuf->resv;
+
+ return obj;
+}
+
+int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_vc4_create_bo *args = data;
+ struct vc4_bo *bo = NULL;
+ int ret;
+
+ /*
+ * We can't allocate from the BO cache, because the BOs don't
+ * get zeroed, and that might leak data between users.
+ */
+ bo = vc4_bo_create(dev, args->size, false, VC4_BO_TYPE_V3D);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ bo->madv = VC4_MADV_WILLNEED;
+
+ ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
+ drm_gem_object_put_unlocked(&bo->base.base);
+
+ return ret;
+}
+
+int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_vc4_mmap_bo *args = data;
+ struct drm_gem_object *gem_obj;
+
+ gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+ if (!gem_obj) {
+ DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
+ return -EINVAL;
+ }
+
+ /* The mmap offset was set up at BO allocation time. */
+ args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
+
+ drm_gem_object_put_unlocked(gem_obj);
+ return 0;
+}
+
+int
+vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_vc4_create_shader_bo *args = data;
+ struct vc4_bo *bo = NULL;
+ int ret;
+
+ if (args->size == 0)
+ return -EINVAL;
+
+ if (args->size % sizeof(u64) != 0)
+ return -EINVAL;
+
+ if (args->flags != 0) {
+ DRM_INFO("Unknown flags set: 0x%08x\n", args->flags);
+ return -EINVAL;
+ }
+
+ if (args->pad != 0) {
+ DRM_INFO("Pad set: 0x%08x\n", args->pad);
+ return -EINVAL;
+ }
+
+ bo = vc4_bo_create(dev, args->size, true, VC4_BO_TYPE_V3D_SHADER);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ bo->madv = VC4_MADV_WILLNEED;
+
+ if (copy_from_user(bo->base.vaddr,
+ (void __user *)(uintptr_t)args->data,
+ args->size)) {
+ ret = -EFAULT;
+ goto fail;
+ }
+ /* Clear the rest of the memory from allocating from the BO
+ * cache.
+ */
+ memset(bo->base.vaddr + args->size, 0,
+ bo->base.base.size - args->size);
+
+ bo->validated_shader = vc4_validate_shader(&bo->base);
+ if (!bo->validated_shader) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* We have to create the handle after validation, to avoid
+ * races for users to do doing things like mmap the shader BO.
+ */
+ ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
+
+ fail:
+ drm_gem_object_put_unlocked(&bo->base.base);
+
+ return ret;
+}
+
+/**
+ * vc4_set_tiling_ioctl() - Sets the tiling modifier for a BO.
+ * @dev: DRM device
+ * @data: ioctl argument
+ * @file_priv: DRM file for this fd
+ *
+ * The tiling state of the BO decides the default modifier of an fb if
+ * no specific modifier was set by userspace, and the return value of
+ * vc4_get_tiling_ioctl() (so that userspace can treat a BO it
+ * received from dmabuf as the same tiling format as the producer
+ * used).
+ */
+int vc4_set_tiling_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_vc4_set_tiling *args = data;
+ struct drm_gem_object *gem_obj;
+ struct vc4_bo *bo;
+ bool t_format;
+
+ if (args->flags != 0)
+ return -EINVAL;
+
+ switch (args->modifier) {
+ case DRM_FORMAT_MOD_NONE:
+ t_format = false;
+ break;
+ case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED:
+ t_format = true;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+ if (!gem_obj) {
+ DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
+ return -ENOENT;
+ }
+ bo = to_vc4_bo(gem_obj);
+ bo->t_format = t_format;
+
+ drm_gem_object_put_unlocked(gem_obj);
+
+ return 0;
+}
+
+/**
+ * vc4_get_tiling_ioctl() - Gets the tiling modifier for a BO.
+ * @dev: DRM device
+ * @data: ioctl argument
+ * @file_priv: DRM file for this fd
+ *
+ * Returns the tiling modifier for a BO as set by vc4_set_tiling_ioctl().
+ */
+int vc4_get_tiling_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_vc4_get_tiling *args = data;
+ struct drm_gem_object *gem_obj;
+ struct vc4_bo *bo;
+
+ if (args->flags != 0 || args->modifier != 0)
+ return -EINVAL;
+
+ gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+ if (!gem_obj) {
+ DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
+ return -ENOENT;
+ }
+ bo = to_vc4_bo(gem_obj);
+
+ if (bo->t_format)
+ args->modifier = DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED;
+ else
+ args->modifier = DRM_FORMAT_MOD_NONE;
+
+ drm_gem_object_put_unlocked(gem_obj);
+
+ return 0;
+}
+
+int vc4_bo_cache_init(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ int i;
+
+ /* Create the initial set of BO labels that the kernel will
+ * use. This lets us avoid a bunch of string reallocation in
+ * the kernel's draw and BO allocation paths.
+ */
+ vc4->bo_labels = kcalloc(VC4_BO_TYPE_COUNT, sizeof(*vc4->bo_labels),
+ GFP_KERNEL);
+ if (!vc4->bo_labels)
+ return -ENOMEM;
+ vc4->num_labels = VC4_BO_TYPE_COUNT;
+
+ BUILD_BUG_ON(ARRAY_SIZE(bo_type_names) != VC4_BO_TYPE_COUNT);
+ for (i = 0; i < VC4_BO_TYPE_COUNT; i++)
+ vc4->bo_labels[i].name = bo_type_names[i];
+
+ mutex_init(&vc4->bo_lock);
+
+ INIT_LIST_HEAD(&vc4->bo_cache.time_list);
+
+ INIT_WORK(&vc4->bo_cache.time_work, vc4_bo_cache_time_work);
+ timer_setup(&vc4->bo_cache.time_timer, vc4_bo_cache_time_timer, 0);
+
+ return 0;
+}
+
+void vc4_bo_cache_destroy(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ int i;
+
+ del_timer(&vc4->bo_cache.time_timer);
+ cancel_work_sync(&vc4->bo_cache.time_work);
+
+ vc4_bo_cache_purge(dev);
+
+ for (i = 0; i < vc4->num_labels; i++) {
+ if (vc4->bo_labels[i].num_allocated) {
+ DRM_ERROR("Destroying BO cache with %d %s "
+ "BOs still allocated\n",
+ vc4->bo_labels[i].num_allocated,
+ vc4->bo_labels[i].name);
+ }
+
+ if (is_user_label(i))
+ kfree(vc4->bo_labels[i].name);
+ }
+ kfree(vc4->bo_labels);
+}
+
+int vc4_label_bo_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct drm_vc4_label_bo *args = data;
+ char *name;
+ struct drm_gem_object *gem_obj;
+ int ret = 0, label;
+
+ if (!args->len)
+ return -EINVAL;
+
+ name = strndup_user(u64_to_user_ptr(args->name), args->len + 1);
+ if (IS_ERR(name))
+ return PTR_ERR(name);
+
+ gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+ if (!gem_obj) {
+ DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
+ kfree(name);
+ return -ENOENT;
+ }
+
+ mutex_lock(&vc4->bo_lock);
+ label = vc4_get_user_label(vc4, name);
+ if (label != -1)
+ vc4_bo_set_label(gem_obj, label);
+ else
+ ret = -ENOMEM;
+ mutex_unlock(&vc4->bo_lock);
+
+ drm_gem_object_put_unlocked(gem_obj);
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
new file mode 100644
index 000000000..5615ceb15
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -0,0 +1,1254 @@
+/*
+ * Copyright (C) 2015 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/**
+ * DOC: VC4 CRTC module
+ *
+ * In VC4, the Pixel Valve is what most closely corresponds to the
+ * DRM's concept of a CRTC. The PV generates video timings from the
+ * encoder's clock plus its configuration. It pulls scaled pixels from
+ * the HVS at that timing, and feeds it to the encoder.
+ *
+ * However, the DRM CRTC also collects the configuration of all the
+ * DRM planes attached to it. As a result, the CRTC is also
+ * responsible for writing the display list for the HVS channel that
+ * the CRTC will use.
+ *
+ * The 2835 has 3 different pixel valves. pv0 in the audio power
+ * domain feeds DSI0 or DPI, while pv1 feeds DS1 or SMI. pv2 in the
+ * image domain can feed either HDMI or the SDTV controller. The
+ * pixel valve chooses from the CPRMAN clocks (HSM for HDMI, VEC for
+ * SDTV, etc.) according to which output type is chosen in the mux.
+ *
+ * For power management, the pixel valve's registers are all clocked
+ * by the AXI clock, while the timings and FIFOs make use of the
+ * output-specific clock. Since the encoders also directly consume
+ * the CPRMAN clocks, and know what timings they need, they are the
+ * ones that set the clock.
+ */
+
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <linux/clk.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <linux/component.h>
+#include <linux/of_device.h>
+#include "vc4_drv.h"
+#include "vc4_regs.h"
+
+struct vc4_crtc_state {
+ struct drm_crtc_state base;
+ /* Dlist area for this CRTC configuration. */
+ struct drm_mm_node mm;
+ bool feed_txp;
+ bool txp_armed;
+};
+
+static inline struct vc4_crtc_state *
+to_vc4_crtc_state(struct drm_crtc_state *crtc_state)
+{
+ return (struct vc4_crtc_state *)crtc_state;
+}
+
+#define CRTC_WRITE(offset, val) writel(val, vc4_crtc->regs + (offset))
+#define CRTC_READ(offset) readl(vc4_crtc->regs + (offset))
+
+#define CRTC_REG(reg) { reg, #reg }
+static const struct {
+ u32 reg;
+ const char *name;
+} crtc_regs[] = {
+ CRTC_REG(PV_CONTROL),
+ CRTC_REG(PV_V_CONTROL),
+ CRTC_REG(PV_VSYNCD_EVEN),
+ CRTC_REG(PV_HORZA),
+ CRTC_REG(PV_HORZB),
+ CRTC_REG(PV_VERTA),
+ CRTC_REG(PV_VERTB),
+ CRTC_REG(PV_VERTA_EVEN),
+ CRTC_REG(PV_VERTB_EVEN),
+ CRTC_REG(PV_INTEN),
+ CRTC_REG(PV_INTSTAT),
+ CRTC_REG(PV_STAT),
+ CRTC_REG(PV_HACT_ACT),
+};
+
+static void vc4_crtc_dump_regs(struct vc4_crtc *vc4_crtc)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(crtc_regs); i++) {
+ DRM_INFO("0x%04x (%s): 0x%08x\n",
+ crtc_regs[i].reg, crtc_regs[i].name,
+ CRTC_READ(crtc_regs[i].reg));
+ }
+}
+
+#ifdef CONFIG_DEBUG_FS
+int vc4_crtc_debugfs_regs(struct seq_file *m, void *unused)
+{
+ struct drm_info_node *node = (struct drm_info_node *)m->private;
+ struct drm_device *dev = node->minor->dev;
+ int crtc_index = (uintptr_t)node->info_ent->data;
+ struct drm_crtc *crtc;
+ struct vc4_crtc *vc4_crtc;
+ int i;
+
+ i = 0;
+ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+ if (i == crtc_index)
+ break;
+ i++;
+ }
+ if (!crtc)
+ return 0;
+ vc4_crtc = to_vc4_crtc(crtc);
+
+ for (i = 0; i < ARRAY_SIZE(crtc_regs); i++) {
+ seq_printf(m, "%s (0x%04x): 0x%08x\n",
+ crtc_regs[i].name, crtc_regs[i].reg,
+ CRTC_READ(crtc_regs[i].reg));
+ }
+
+ return 0;
+}
+#endif
+
+bool vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
+ bool in_vblank_irq, int *vpos, int *hpos,
+ ktime_t *stime, ktime_t *etime,
+ const struct drm_display_mode *mode)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct drm_crtc *crtc = drm_crtc_from_index(dev, crtc_id);
+ struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
+ u32 val;
+ int fifo_lines;
+ int vblank_lines;
+ bool ret = false;
+
+ /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
+
+ /* Get optional system timestamp before query. */
+ if (stime)
+ *stime = ktime_get();
+
+ /*
+ * Read vertical scanline which is currently composed for our
+ * pixelvalve by the HVS, and also the scaler status.
+ */
+ val = HVS_READ(SCALER_DISPSTATX(vc4_crtc->channel));
+
+ /* Get optional system timestamp after query. */
+ if (etime)
+ *etime = ktime_get();
+
+ /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
+
+ /* Vertical position of hvs composed scanline. */
+ *vpos = VC4_GET_FIELD(val, SCALER_DISPSTATX_LINE);
+ *hpos = 0;
+
+ if (mode->flags & DRM_MODE_FLAG_INTERLACE) {
+ *vpos /= 2;
+
+ /* Use hpos to correct for field offset in interlaced mode. */
+ if (VC4_GET_FIELD(val, SCALER_DISPSTATX_FRAME_COUNT) % 2)
+ *hpos += mode->crtc_htotal / 2;
+ }
+
+ /* This is the offset we need for translating hvs -> pv scanout pos. */
+ fifo_lines = vc4_crtc->cob_size / mode->crtc_hdisplay;
+
+ if (fifo_lines > 0)
+ ret = true;
+
+ /* HVS more than fifo_lines into frame for compositing? */
+ if (*vpos > fifo_lines) {
+ /*
+ * We are in active scanout and can get some meaningful results
+ * from HVS. The actual PV scanout can not trail behind more
+ * than fifo_lines as that is the fifo's capacity. Assume that
+ * in active scanout the HVS and PV work in lockstep wrt. HVS
+ * refilling the fifo and PV consuming from the fifo, ie.
+ * whenever the PV consumes and frees up a scanline in the
+ * fifo, the HVS will immediately refill it, therefore
+ * incrementing vpos. Therefore we choose HVS read position -
+ * fifo size in scanlines as a estimate of the real scanout
+ * position of the PV.
+ */
+ *vpos -= fifo_lines + 1;
+
+ return ret;
+ }
+
+ /*
+ * Less: This happens when we are in vblank and the HVS, after getting
+ * the VSTART restart signal from the PV, just started refilling its
+ * fifo with new lines from the top-most lines of the new framebuffers.
+ * The PV does not scan out in vblank, so does not remove lines from
+ * the fifo, so the fifo will be full quickly and the HVS has to pause.
+ * We can't get meaningful readings wrt. scanline position of the PV
+ * and need to make things up in a approximative but consistent way.
+ */
+ vblank_lines = mode->vtotal - mode->vdisplay;
+
+ if (in_vblank_irq) {
+ /*
+ * Assume the irq handler got called close to first
+ * line of vblank, so PV has about a full vblank
+ * scanlines to go, and as a base timestamp use the
+ * one taken at entry into vblank irq handler, so it
+ * is not affected by random delays due to lock
+ * contention on event_lock or vblank_time lock in
+ * the core.
+ */
+ *vpos = -vblank_lines;
+
+ if (stime)
+ *stime = vc4_crtc->t_vblank;
+ if (etime)
+ *etime = vc4_crtc->t_vblank;
+
+ /*
+ * If the HVS fifo is not yet full then we know for certain
+ * we are at the very beginning of vblank, as the hvs just
+ * started refilling, and the stime and etime timestamps
+ * truly correspond to start of vblank.
+ *
+ * Unfortunately there's no way to report this to upper levels
+ * and make it more useful.
+ */
+ } else {
+ /*
+ * No clue where we are inside vblank. Return a vpos of zero,
+ * which will cause calling code to just return the etime
+ * timestamp uncorrected. At least this is no worse than the
+ * standard fallback.
+ */
+ *vpos = 0;
+ }
+
+ return ret;
+}
+
+static void vc4_crtc_destroy(struct drm_crtc *crtc)
+{
+ drm_crtc_cleanup(crtc);
+}
+
+static void
+vc4_crtc_lut_load(struct drm_crtc *crtc)
+{
+ struct drm_device *dev = crtc->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
+ u32 i;
+
+ /* The LUT memory is laid out with each HVS channel in order,
+ * each of which takes 256 writes for R, 256 for G, then 256
+ * for B.
+ */
+ HVS_WRITE(SCALER_GAMADDR,
+ SCALER_GAMADDR_AUTOINC |
+ (vc4_crtc->channel * 3 * crtc->gamma_size));
+
+ for (i = 0; i < crtc->gamma_size; i++)
+ HVS_WRITE(SCALER_GAMDATA, vc4_crtc->lut_r[i]);
+ for (i = 0; i < crtc->gamma_size; i++)
+ HVS_WRITE(SCALER_GAMDATA, vc4_crtc->lut_g[i]);
+ for (i = 0; i < crtc->gamma_size; i++)
+ HVS_WRITE(SCALER_GAMDATA, vc4_crtc->lut_b[i]);
+}
+
+static void
+vc4_crtc_update_gamma_lut(struct drm_crtc *crtc)
+{
+ struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
+ struct drm_color_lut *lut = crtc->state->gamma_lut->data;
+ u32 length = drm_color_lut_size(crtc->state->gamma_lut);
+ u32 i;
+
+ for (i = 0; i < length; i++) {
+ vc4_crtc->lut_r[i] = drm_color_lut_extract(lut[i].red, 8);
+ vc4_crtc->lut_g[i] = drm_color_lut_extract(lut[i].green, 8);
+ vc4_crtc->lut_b[i] = drm_color_lut_extract(lut[i].blue, 8);
+ }
+
+ vc4_crtc_lut_load(crtc);
+}
+
+static u32 vc4_get_fifo_full_level(u32 format)
+{
+ static const u32 fifo_len_bytes = 64;
+ static const u32 hvs_latency_pix = 6;
+
+ switch (format) {
+ case PV_CONTROL_FORMAT_DSIV_16:
+ case PV_CONTROL_FORMAT_DSIC_16:
+ return fifo_len_bytes - 2 * hvs_latency_pix;
+ case PV_CONTROL_FORMAT_DSIV_18:
+ return fifo_len_bytes - 14;
+ case PV_CONTROL_FORMAT_24:
+ case PV_CONTROL_FORMAT_DSIV_24:
+ default:
+ return fifo_len_bytes - 3 * hvs_latency_pix;
+ }
+}
+
+/*
+ * Returns the encoder attached to the CRTC.
+ *
+ * VC4 can only scan out to one encoder at a time, while the DRM core
+ * allows drivers to push pixels to more than one encoder from the
+ * same CRTC.
+ */
+static struct drm_encoder *vc4_get_crtc_encoder(struct drm_crtc *crtc)
+{
+ struct drm_connector *connector;
+ struct drm_connector_list_iter conn_iter;
+
+ drm_connector_list_iter_begin(crtc->dev, &conn_iter);
+ drm_for_each_connector_iter(connector, &conn_iter) {
+ if (connector->state->crtc == crtc) {
+ drm_connector_list_iter_end(&conn_iter);
+ return connector->encoder;
+ }
+ }
+ drm_connector_list_iter_end(&conn_iter);
+
+ return NULL;
+}
+
+static void vc4_crtc_config_pv(struct drm_crtc *crtc)
+{
+ struct drm_encoder *encoder = vc4_get_crtc_encoder(crtc);
+ struct vc4_encoder *vc4_encoder = to_vc4_encoder(encoder);
+ struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
+ struct drm_crtc_state *state = crtc->state;
+ struct drm_display_mode *mode = &state->adjusted_mode;
+ bool interlace = mode->flags & DRM_MODE_FLAG_INTERLACE;
+ u32 pixel_rep = (mode->flags & DRM_MODE_FLAG_DBLCLK) ? 2 : 1;
+ bool is_dsi = (vc4_encoder->type == VC4_ENCODER_TYPE_DSI0 ||
+ vc4_encoder->type == VC4_ENCODER_TYPE_DSI1);
+ u32 format = is_dsi ? PV_CONTROL_FORMAT_DSIV_24 : PV_CONTROL_FORMAT_24;
+
+ /* Reset the PV fifo. */
+ CRTC_WRITE(PV_CONTROL, 0);
+ CRTC_WRITE(PV_CONTROL, PV_CONTROL_FIFO_CLR | PV_CONTROL_EN);
+ CRTC_WRITE(PV_CONTROL, 0);
+
+ CRTC_WRITE(PV_HORZA,
+ VC4_SET_FIELD((mode->htotal -
+ mode->hsync_end) * pixel_rep,
+ PV_HORZA_HBP) |
+ VC4_SET_FIELD((mode->hsync_end -
+ mode->hsync_start) * pixel_rep,
+ PV_HORZA_HSYNC));
+ CRTC_WRITE(PV_HORZB,
+ VC4_SET_FIELD((mode->hsync_start -
+ mode->hdisplay) * pixel_rep,
+ PV_HORZB_HFP) |
+ VC4_SET_FIELD(mode->hdisplay * pixel_rep, PV_HORZB_HACTIVE));
+
+ CRTC_WRITE(PV_VERTA,
+ VC4_SET_FIELD(mode->crtc_vtotal - mode->crtc_vsync_end,
+ PV_VERTA_VBP) |
+ VC4_SET_FIELD(mode->crtc_vsync_end - mode->crtc_vsync_start,
+ PV_VERTA_VSYNC));
+ CRTC_WRITE(PV_VERTB,
+ VC4_SET_FIELD(mode->crtc_vsync_start - mode->crtc_vdisplay,
+ PV_VERTB_VFP) |
+ VC4_SET_FIELD(mode->crtc_vdisplay, PV_VERTB_VACTIVE));
+
+ if (interlace) {
+ CRTC_WRITE(PV_VERTA_EVEN,
+ VC4_SET_FIELD(mode->crtc_vtotal -
+ mode->crtc_vsync_end - 1,
+ PV_VERTA_VBP) |
+ VC4_SET_FIELD(mode->crtc_vsync_end -
+ mode->crtc_vsync_start,
+ PV_VERTA_VSYNC));
+ CRTC_WRITE(PV_VERTB_EVEN,
+ VC4_SET_FIELD(mode->crtc_vsync_start -
+ mode->crtc_vdisplay,
+ PV_VERTB_VFP) |
+ VC4_SET_FIELD(mode->crtc_vdisplay, PV_VERTB_VACTIVE));
+
+ /* We set up first field even mode for HDMI. VEC's
+ * NTSC mode would want first field odd instead, once
+ * we support it (to do so, set ODD_FIRST and put the
+ * delay in VSYNCD_EVEN instead).
+ */
+ CRTC_WRITE(PV_V_CONTROL,
+ PV_VCONTROL_CONTINUOUS |
+ (is_dsi ? PV_VCONTROL_DSI : 0) |
+ PV_VCONTROL_INTERLACE |
+ VC4_SET_FIELD(mode->htotal * pixel_rep / 2,
+ PV_VCONTROL_ODD_DELAY));
+ CRTC_WRITE(PV_VSYNCD_EVEN, 0);
+ } else {
+ CRTC_WRITE(PV_V_CONTROL,
+ PV_VCONTROL_CONTINUOUS |
+ (is_dsi ? PV_VCONTROL_DSI : 0));
+ }
+
+ CRTC_WRITE(PV_HACT_ACT, mode->hdisplay * pixel_rep);
+
+ CRTC_WRITE(PV_CONTROL,
+ VC4_SET_FIELD(format, PV_CONTROL_FORMAT) |
+ VC4_SET_FIELD(vc4_get_fifo_full_level(format),
+ PV_CONTROL_FIFO_LEVEL) |
+ VC4_SET_FIELD(pixel_rep - 1, PV_CONTROL_PIXEL_REP) |
+ PV_CONTROL_CLR_AT_START |
+ PV_CONTROL_TRIGGER_UNDERFLOW |
+ PV_CONTROL_WAIT_HSTART |
+ VC4_SET_FIELD(vc4_encoder->clock_select,
+ PV_CONTROL_CLK_SELECT) |
+ PV_CONTROL_FIFO_CLR |
+ PV_CONTROL_EN);
+}
+
+static void vc4_crtc_mode_set_nofb(struct drm_crtc *crtc)
+{
+ struct drm_device *dev = crtc->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
+ struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
+ struct drm_display_mode *mode = &crtc->state->adjusted_mode;
+ bool interlace = mode->flags & DRM_MODE_FLAG_INTERLACE;
+ bool debug_dump_regs = false;
+
+ if (debug_dump_regs) {
+ DRM_INFO("CRTC %d regs before:\n", drm_crtc_index(crtc));
+ vc4_crtc_dump_regs(vc4_crtc);
+ }
+
+ if (vc4_crtc->channel == 2) {
+ u32 dispctrl;
+ u32 dsp3_mux;
+
+ /*
+ * SCALER_DISPCTRL_DSP3 = X, where X < 2 means 'connect DSP3 to
+ * FIFO X'.
+ * SCALER_DISPCTRL_DSP3 = 3 means 'disable DSP 3'.
+ *
+ * DSP3 is connected to FIFO2 unless the transposer is
+ * enabled. In this case, FIFO 2 is directly accessed by the
+ * TXP IP, and we need to disable the FIFO2 -> pixelvalve1
+ * route.
+ */
+ if (vc4_state->feed_txp)
+ dsp3_mux = VC4_SET_FIELD(3, SCALER_DISPCTRL_DSP3_MUX);
+ else
+ dsp3_mux = VC4_SET_FIELD(2, SCALER_DISPCTRL_DSP3_MUX);
+
+ dispctrl = HVS_READ(SCALER_DISPCTRL) &
+ ~SCALER_DISPCTRL_DSP3_MUX_MASK;
+ HVS_WRITE(SCALER_DISPCTRL, dispctrl | dsp3_mux);
+ }
+
+ if (!vc4_state->feed_txp)
+ vc4_crtc_config_pv(crtc);
+
+ HVS_WRITE(SCALER_DISPBKGNDX(vc4_crtc->channel),
+ SCALER_DISPBKGND_AUTOHS |
+ SCALER_DISPBKGND_GAMMA |
+ (interlace ? SCALER_DISPBKGND_INTERLACE : 0));
+
+ /* Reload the LUT, since the SRAMs would have been disabled if
+ * all CRTCs had SCALER_DISPBKGND_GAMMA unset at once.
+ */
+ vc4_crtc_lut_load(crtc);
+
+ if (debug_dump_regs) {
+ DRM_INFO("CRTC %d regs after:\n", drm_crtc_index(crtc));
+ vc4_crtc_dump_regs(vc4_crtc);
+ }
+}
+
+static void require_hvs_enabled(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+ WARN_ON_ONCE((HVS_READ(SCALER_DISPCTRL) & SCALER_DISPCTRL_ENABLE) !=
+ SCALER_DISPCTRL_ENABLE);
+}
+
+static void vc4_crtc_atomic_disable(struct drm_crtc *crtc,
+ struct drm_crtc_state *old_state)
+{
+ struct drm_device *dev = crtc->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
+ u32 chan = vc4_crtc->channel;
+ int ret;
+ require_hvs_enabled(dev);
+
+ /* Disable vblank irq handling before crtc is disabled. */
+ drm_crtc_vblank_off(crtc);
+
+ CRTC_WRITE(PV_V_CONTROL,
+ CRTC_READ(PV_V_CONTROL) & ~PV_VCONTROL_VIDEN);
+ ret = wait_for(!(CRTC_READ(PV_V_CONTROL) & PV_VCONTROL_VIDEN), 1);
+ WARN_ONCE(ret, "Timeout waiting for !PV_VCONTROL_VIDEN\n");
+
+ if (HVS_READ(SCALER_DISPCTRLX(chan)) &
+ SCALER_DISPCTRLX_ENABLE) {
+ HVS_WRITE(SCALER_DISPCTRLX(chan),
+ SCALER_DISPCTRLX_RESET);
+
+ /* While the docs say that reset is self-clearing, it
+ * seems it doesn't actually.
+ */
+ HVS_WRITE(SCALER_DISPCTRLX(chan), 0);
+ }
+
+ /* Once we leave, the scaler should be disabled and its fifo empty. */
+
+ WARN_ON_ONCE(HVS_READ(SCALER_DISPCTRLX(chan)) & SCALER_DISPCTRLX_RESET);
+
+ WARN_ON_ONCE(VC4_GET_FIELD(HVS_READ(SCALER_DISPSTATX(chan)),
+ SCALER_DISPSTATX_MODE) !=
+ SCALER_DISPSTATX_MODE_DISABLED);
+
+ WARN_ON_ONCE((HVS_READ(SCALER_DISPSTATX(chan)) &
+ (SCALER_DISPSTATX_FULL | SCALER_DISPSTATX_EMPTY)) !=
+ SCALER_DISPSTATX_EMPTY);
+
+ /*
+ * Make sure we issue a vblank event after disabling the CRTC if
+ * someone was waiting it.
+ */
+ if (crtc->state->event) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->event_lock, flags);
+ drm_crtc_send_vblank_event(crtc, crtc->state->event);
+ crtc->state->event = NULL;
+ spin_unlock_irqrestore(&dev->event_lock, flags);
+ }
+}
+
+void vc4_crtc_txp_armed(struct drm_crtc_state *state)
+{
+ struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(state);
+
+ vc4_state->txp_armed = true;
+}
+
+static void vc4_crtc_update_dlist(struct drm_crtc *crtc)
+{
+ struct drm_device *dev = crtc->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
+ struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
+
+ if (crtc->state->event) {
+ unsigned long flags;
+
+ crtc->state->event->pipe = drm_crtc_index(crtc);
+
+ WARN_ON(drm_crtc_vblank_get(crtc) != 0);
+
+ spin_lock_irqsave(&dev->event_lock, flags);
+
+ if (!vc4_state->feed_txp || vc4_state->txp_armed) {
+ vc4_crtc->event = crtc->state->event;
+ crtc->state->event = NULL;
+ }
+
+ HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel),
+ vc4_state->mm.start);
+
+ spin_unlock_irqrestore(&dev->event_lock, flags);
+ } else {
+ HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel),
+ vc4_state->mm.start);
+ }
+}
+
+static void vc4_crtc_atomic_enable(struct drm_crtc *crtc,
+ struct drm_crtc_state *old_state)
+{
+ struct drm_device *dev = crtc->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
+ struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
+ struct drm_display_mode *mode = &crtc->state->adjusted_mode;
+
+ require_hvs_enabled(dev);
+
+ /* Enable vblank irq handling before crtc is started otherwise
+ * drm_crtc_get_vblank() fails in vc4_crtc_update_dlist().
+ */
+ drm_crtc_vblank_on(crtc);
+ vc4_crtc_update_dlist(crtc);
+
+ /* Turn on the scaler, which will wait for vstart to start
+ * compositing.
+ * When feeding the transposer, we should operate in oneshot
+ * mode.
+ */
+ HVS_WRITE(SCALER_DISPCTRLX(vc4_crtc->channel),
+ VC4_SET_FIELD(mode->hdisplay, SCALER_DISPCTRLX_WIDTH) |
+ VC4_SET_FIELD(mode->vdisplay, SCALER_DISPCTRLX_HEIGHT) |
+ SCALER_DISPCTRLX_ENABLE |
+ (vc4_state->feed_txp ? SCALER_DISPCTRLX_ONESHOT : 0));
+
+ /* When feeding the transposer block the pixelvalve is unneeded and
+ * should not be enabled.
+ */
+ if (!vc4_state->feed_txp)
+ CRTC_WRITE(PV_V_CONTROL,
+ CRTC_READ(PV_V_CONTROL) | PV_VCONTROL_VIDEN);
+}
+
+static enum drm_mode_status vc4_crtc_mode_valid(struct drm_crtc *crtc,
+ const struct drm_display_mode *mode)
+{
+ /* Do not allow doublescan modes from user space */
+ if (mode->flags & DRM_MODE_FLAG_DBLSCAN) {
+ DRM_DEBUG_KMS("[CRTC:%d] Doublescan mode rejected.\n",
+ crtc->base.id);
+ return MODE_NO_DBLESCAN;
+ }
+
+ return MODE_OK;
+}
+
+static int vc4_crtc_atomic_check(struct drm_crtc *crtc,
+ struct drm_crtc_state *state)
+{
+ struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(state);
+ struct drm_device *dev = crtc->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct drm_plane *plane;
+ unsigned long flags;
+ const struct drm_plane_state *plane_state;
+ struct drm_connector *conn;
+ struct drm_connector_state *conn_state;
+ u32 dlist_count = 0;
+ int ret, i;
+
+ /* The pixelvalve can only feed one encoder (and encoders are
+ * 1:1 with connectors.)
+ */
+ if (hweight32(state->connector_mask) > 1)
+ return -EINVAL;
+
+ drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, state)
+ dlist_count += vc4_plane_dlist_size(plane_state);
+
+ dlist_count++; /* Account for SCALER_CTL0_END. */
+
+ spin_lock_irqsave(&vc4->hvs->mm_lock, flags);
+ ret = drm_mm_insert_node(&vc4->hvs->dlist_mm, &vc4_state->mm,
+ dlist_count);
+ spin_unlock_irqrestore(&vc4->hvs->mm_lock, flags);
+ if (ret)
+ return ret;
+
+ for_each_new_connector_in_state(state->state, conn, conn_state, i) {
+ if (conn_state->crtc != crtc)
+ continue;
+
+ /* The writeback connector is implemented using the transposer
+ * block which is directly taking its data from the HVS FIFO.
+ */
+ if (conn->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) {
+ state->no_vblank = true;
+ vc4_state->feed_txp = true;
+ } else {
+ state->no_vblank = false;
+ vc4_state->feed_txp = false;
+ }
+
+ break;
+ }
+
+ return 0;
+}
+
+static void vc4_crtc_atomic_flush(struct drm_crtc *crtc,
+ struct drm_crtc_state *old_state)
+{
+ struct drm_device *dev = crtc->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
+ struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
+ struct drm_plane *plane;
+ struct vc4_plane_state *vc4_plane_state;
+ bool debug_dump_regs = false;
+ bool enable_bg_fill = false;
+ u32 __iomem *dlist_start = vc4->hvs->dlist + vc4_state->mm.start;
+ u32 __iomem *dlist_next = dlist_start;
+
+ if (debug_dump_regs) {
+ DRM_INFO("CRTC %d HVS before:\n", drm_crtc_index(crtc));
+ vc4_hvs_dump_state(dev);
+ }
+
+ /* Copy all the active planes' dlist contents to the hardware dlist. */
+ drm_atomic_crtc_for_each_plane(plane, crtc) {
+ /* Is this the first active plane? */
+ if (dlist_next == dlist_start) {
+ /* We need to enable background fill when a plane
+ * could be alpha blending from the background, i.e.
+ * where no other plane is underneath. It suffices to
+ * consider the first active plane here since we set
+ * needs_bg_fill such that either the first plane
+ * already needs it or all planes on top blend from
+ * the first or a lower plane.
+ */
+ vc4_plane_state = to_vc4_plane_state(plane->state);
+ enable_bg_fill = vc4_plane_state->needs_bg_fill;
+ }
+
+ dlist_next += vc4_plane_write_dlist(plane, dlist_next);
+ }
+
+ writel(SCALER_CTL0_END, dlist_next);
+ dlist_next++;
+
+ WARN_ON_ONCE(dlist_next - dlist_start != vc4_state->mm.size);
+
+ if (enable_bg_fill)
+ /* This sets a black background color fill, as is the case
+ * with other DRM drivers.
+ */
+ HVS_WRITE(SCALER_DISPBKGNDX(vc4_crtc->channel),
+ HVS_READ(SCALER_DISPBKGNDX(vc4_crtc->channel)) |
+ SCALER_DISPBKGND_FILL);
+
+ /* Only update DISPLIST if the CRTC was already running and is not
+ * being disabled.
+ * vc4_crtc_enable() takes care of updating the dlist just after
+ * re-enabling VBLANK interrupts and before enabling the engine.
+ * If the CRTC is being disabled, there's no point in updating this
+ * information.
+ */
+ if (crtc->state->active && old_state->active)
+ vc4_crtc_update_dlist(crtc);
+
+ if (crtc->state->color_mgmt_changed) {
+ u32 dispbkgndx = HVS_READ(SCALER_DISPBKGNDX(vc4_crtc->channel));
+
+ if (crtc->state->gamma_lut) {
+ vc4_crtc_update_gamma_lut(crtc);
+ dispbkgndx |= SCALER_DISPBKGND_GAMMA;
+ } else {
+ /* Unsetting DISPBKGND_GAMMA skips the gamma lut step
+ * in hardware, which is the same as a linear lut that
+ * DRM expects us to use in absence of a user lut.
+ */
+ dispbkgndx &= ~SCALER_DISPBKGND_GAMMA;
+ }
+ HVS_WRITE(SCALER_DISPBKGNDX(vc4_crtc->channel), dispbkgndx);
+ }
+
+ if (debug_dump_regs) {
+ DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc));
+ vc4_hvs_dump_state(dev);
+ }
+}
+
+static int vc4_enable_vblank(struct drm_crtc *crtc)
+{
+ struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
+
+ CRTC_WRITE(PV_INTEN, PV_INT_VFP_START);
+
+ return 0;
+}
+
+static void vc4_disable_vblank(struct drm_crtc *crtc)
+{
+ struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
+
+ CRTC_WRITE(PV_INTEN, 0);
+}
+
+static void vc4_crtc_handle_page_flip(struct vc4_crtc *vc4_crtc)
+{
+ struct drm_crtc *crtc = &vc4_crtc->base;
+ struct drm_device *dev = crtc->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
+ u32 chan = vc4_crtc->channel;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->event_lock, flags);
+ if (vc4_crtc->event &&
+ (vc4_state->mm.start == HVS_READ(SCALER_DISPLACTX(chan)) ||
+ vc4_state->feed_txp)) {
+ drm_crtc_send_vblank_event(crtc, vc4_crtc->event);
+ vc4_crtc->event = NULL;
+ drm_crtc_vblank_put(crtc);
+ }
+ spin_unlock_irqrestore(&dev->event_lock, flags);
+}
+
+void vc4_crtc_handle_vblank(struct vc4_crtc *crtc)
+{
+ crtc->t_vblank = ktime_get();
+ drm_crtc_handle_vblank(&crtc->base);
+ vc4_crtc_handle_page_flip(crtc);
+}
+
+static irqreturn_t vc4_crtc_irq_handler(int irq, void *data)
+{
+ struct vc4_crtc *vc4_crtc = data;
+ u32 stat = CRTC_READ(PV_INTSTAT);
+ irqreturn_t ret = IRQ_NONE;
+
+ if (stat & PV_INT_VFP_START) {
+ CRTC_WRITE(PV_INTSTAT, PV_INT_VFP_START);
+ vc4_crtc_handle_vblank(vc4_crtc);
+ ret = IRQ_HANDLED;
+ }
+
+ return ret;
+}
+
+struct vc4_async_flip_state {
+ struct drm_crtc *crtc;
+ struct drm_framebuffer *fb;
+ struct drm_framebuffer *old_fb;
+ struct drm_pending_vblank_event *event;
+
+ struct vc4_seqno_cb cb;
+};
+
+/* Called when the V3D execution for the BO being flipped to is done, so that
+ * we can actually update the plane's address to point to it.
+ */
+static void
+vc4_async_page_flip_complete(struct vc4_seqno_cb *cb)
+{
+ struct vc4_async_flip_state *flip_state =
+ container_of(cb, struct vc4_async_flip_state, cb);
+ struct drm_crtc *crtc = flip_state->crtc;
+ struct drm_device *dev = crtc->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct drm_plane *plane = crtc->primary;
+
+ vc4_plane_async_set_fb(plane, flip_state->fb);
+ if (flip_state->event) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->event_lock, flags);
+ drm_crtc_send_vblank_event(crtc, flip_state->event);
+ spin_unlock_irqrestore(&dev->event_lock, flags);
+ }
+
+ drm_crtc_vblank_put(crtc);
+ drm_framebuffer_put(flip_state->fb);
+
+ /* Decrement the BO usecnt in order to keep the inc/dec calls balanced
+ * when the planes are updated through the async update path.
+ * FIXME: we should move to generic async-page-flip when it's
+ * available, so that we can get rid of this hand-made cleanup_fb()
+ * logic.
+ */
+ if (flip_state->old_fb) {
+ struct drm_gem_cma_object *cma_bo;
+ struct vc4_bo *bo;
+
+ cma_bo = drm_fb_cma_get_gem_obj(flip_state->old_fb, 0);
+ bo = to_vc4_bo(&cma_bo->base);
+ vc4_bo_dec_usecnt(bo);
+ drm_framebuffer_put(flip_state->old_fb);
+ }
+
+ kfree(flip_state);
+
+ up(&vc4->async_modeset);
+}
+
+/* Implements async (non-vblank-synced) page flips.
+ *
+ * The page flip ioctl needs to return immediately, so we grab the
+ * modeset semaphore on the pipe, and queue the address update for
+ * when V3D is done with the BO being flipped to.
+ */
+static int vc4_async_page_flip(struct drm_crtc *crtc,
+ struct drm_framebuffer *fb,
+ struct drm_pending_vblank_event *event,
+ uint32_t flags)
+{
+ struct drm_device *dev = crtc->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct drm_plane *plane = crtc->primary;
+ int ret = 0;
+ struct vc4_async_flip_state *flip_state;
+ struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0);
+ struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
+
+ /* Increment the BO usecnt here, so that we never end up with an
+ * unbalanced number of vc4_bo_{dec,inc}_usecnt() calls when the
+ * plane is later updated through the non-async path.
+ * FIXME: we should move to generic async-page-flip when it's
+ * available, so that we can get rid of this hand-made prepare_fb()
+ * logic.
+ */
+ ret = vc4_bo_inc_usecnt(bo);
+ if (ret)
+ return ret;
+
+ flip_state = kzalloc(sizeof(*flip_state), GFP_KERNEL);
+ if (!flip_state) {
+ vc4_bo_dec_usecnt(bo);
+ return -ENOMEM;
+ }
+
+ drm_framebuffer_get(fb);
+ flip_state->fb = fb;
+ flip_state->crtc = crtc;
+ flip_state->event = event;
+
+ /* Make sure all other async modesetes have landed. */
+ ret = down_interruptible(&vc4->async_modeset);
+ if (ret) {
+ drm_framebuffer_put(fb);
+ vc4_bo_dec_usecnt(bo);
+ kfree(flip_state);
+ return ret;
+ }
+
+ /* Save the current FB before it's replaced by the new one in
+ * drm_atomic_set_fb_for_plane(). We'll need the old FB in
+ * vc4_async_page_flip_complete() to decrement the BO usecnt and keep
+ * it consistent.
+ * FIXME: we should move to generic async-page-flip when it's
+ * available, so that we can get rid of this hand-made cleanup_fb()
+ * logic.
+ */
+ flip_state->old_fb = plane->state->fb;
+ if (flip_state->old_fb)
+ drm_framebuffer_get(flip_state->old_fb);
+
+ WARN_ON(drm_crtc_vblank_get(crtc) != 0);
+
+ /* Immediately update the plane's legacy fb pointer, so that later
+ * modeset prep sees the state that will be present when the semaphore
+ * is released.
+ */
+ drm_atomic_set_fb_for_plane(plane->state, fb);
+
+ vc4_queue_seqno_cb(dev, &flip_state->cb, bo->seqno,
+ vc4_async_page_flip_complete);
+
+ /* Driver takes ownership of state on successful async commit. */
+ return 0;
+}
+
+static int vc4_page_flip(struct drm_crtc *crtc,
+ struct drm_framebuffer *fb,
+ struct drm_pending_vblank_event *event,
+ uint32_t flags,
+ struct drm_modeset_acquire_ctx *ctx)
+{
+ if (flags & DRM_MODE_PAGE_FLIP_ASYNC)
+ return vc4_async_page_flip(crtc, fb, event, flags);
+ else
+ return drm_atomic_helper_page_flip(crtc, fb, event, flags, ctx);
+}
+
+static struct drm_crtc_state *vc4_crtc_duplicate_state(struct drm_crtc *crtc)
+{
+ struct vc4_crtc_state *vc4_state, *old_vc4_state;
+
+ vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL);
+ if (!vc4_state)
+ return NULL;
+
+ old_vc4_state = to_vc4_crtc_state(crtc->state);
+ vc4_state->feed_txp = old_vc4_state->feed_txp;
+
+ __drm_atomic_helper_crtc_duplicate_state(crtc, &vc4_state->base);
+ return &vc4_state->base;
+}
+
+static void vc4_crtc_destroy_state(struct drm_crtc *crtc,
+ struct drm_crtc_state *state)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(crtc->dev);
+ struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(state);
+
+ if (vc4_state->mm.allocated) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&vc4->hvs->mm_lock, flags);
+ drm_mm_remove_node(&vc4_state->mm);
+ spin_unlock_irqrestore(&vc4->hvs->mm_lock, flags);
+
+ }
+
+ drm_atomic_helper_crtc_destroy_state(crtc, state);
+}
+
+static void
+vc4_crtc_reset(struct drm_crtc *crtc)
+{
+ if (crtc->state)
+ vc4_crtc_destroy_state(crtc, crtc->state);
+
+ crtc->state = kzalloc(sizeof(struct vc4_crtc_state), GFP_KERNEL);
+ if (crtc->state)
+ crtc->state->crtc = crtc;
+}
+
+static const struct drm_crtc_funcs vc4_crtc_funcs = {
+ .set_config = drm_atomic_helper_set_config,
+ .destroy = vc4_crtc_destroy,
+ .page_flip = vc4_page_flip,
+ .set_property = NULL,
+ .cursor_set = NULL, /* handled by drm_mode_cursor_universal */
+ .cursor_move = NULL, /* handled by drm_mode_cursor_universal */
+ .reset = vc4_crtc_reset,
+ .atomic_duplicate_state = vc4_crtc_duplicate_state,
+ .atomic_destroy_state = vc4_crtc_destroy_state,
+ .gamma_set = drm_atomic_helper_legacy_gamma_set,
+ .enable_vblank = vc4_enable_vblank,
+ .disable_vblank = vc4_disable_vblank,
+};
+
+static const struct drm_crtc_helper_funcs vc4_crtc_helper_funcs = {
+ .mode_set_nofb = vc4_crtc_mode_set_nofb,
+ .mode_valid = vc4_crtc_mode_valid,
+ .atomic_check = vc4_crtc_atomic_check,
+ .atomic_flush = vc4_crtc_atomic_flush,
+ .atomic_enable = vc4_crtc_atomic_enable,
+ .atomic_disable = vc4_crtc_atomic_disable,
+};
+
+static const struct vc4_crtc_data pv0_data = {
+ .hvs_channel = 0,
+ .encoder_types = {
+ [PV_CONTROL_CLK_SELECT_DSI] = VC4_ENCODER_TYPE_DSI0,
+ [PV_CONTROL_CLK_SELECT_DPI_SMI_HDMI] = VC4_ENCODER_TYPE_DPI,
+ },
+};
+
+static const struct vc4_crtc_data pv1_data = {
+ .hvs_channel = 2,
+ .encoder_types = {
+ [PV_CONTROL_CLK_SELECT_DSI] = VC4_ENCODER_TYPE_DSI1,
+ [PV_CONTROL_CLK_SELECT_DPI_SMI_HDMI] = VC4_ENCODER_TYPE_SMI,
+ },
+};
+
+static const struct vc4_crtc_data pv2_data = {
+ .hvs_channel = 1,
+ .encoder_types = {
+ [PV_CONTROL_CLK_SELECT_DPI_SMI_HDMI] = VC4_ENCODER_TYPE_HDMI,
+ [PV_CONTROL_CLK_SELECT_VEC] = VC4_ENCODER_TYPE_VEC,
+ },
+};
+
+static const struct of_device_id vc4_crtc_dt_match[] = {
+ { .compatible = "brcm,bcm2835-pixelvalve0", .data = &pv0_data },
+ { .compatible = "brcm,bcm2835-pixelvalve1", .data = &pv1_data },
+ { .compatible = "brcm,bcm2835-pixelvalve2", .data = &pv2_data },
+ {}
+};
+
+static void vc4_set_crtc_possible_masks(struct drm_device *drm,
+ struct drm_crtc *crtc)
+{
+ struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
+ const struct vc4_crtc_data *crtc_data = vc4_crtc->data;
+ const enum vc4_encoder_type *encoder_types = crtc_data->encoder_types;
+ struct drm_encoder *encoder;
+
+ drm_for_each_encoder(encoder, drm) {
+ struct vc4_encoder *vc4_encoder;
+ int i;
+
+ /* HVS FIFO2 can feed the TXP IP. */
+ if (crtc_data->hvs_channel == 2 &&
+ encoder->encoder_type == DRM_MODE_ENCODER_VIRTUAL) {
+ encoder->possible_crtcs |= drm_crtc_mask(crtc);
+ continue;
+ }
+
+ vc4_encoder = to_vc4_encoder(encoder);
+ for (i = 0; i < ARRAY_SIZE(crtc_data->encoder_types); i++) {
+ if (vc4_encoder->type == encoder_types[i]) {
+ vc4_encoder->clock_select = i;
+ encoder->possible_crtcs |= drm_crtc_mask(crtc);
+ break;
+ }
+ }
+ }
+}
+
+static void
+vc4_crtc_get_cob_allocation(struct vc4_crtc *vc4_crtc)
+{
+ struct drm_device *drm = vc4_crtc->base.dev;
+ struct vc4_dev *vc4 = to_vc4_dev(drm);
+ u32 dispbase = HVS_READ(SCALER_DISPBASEX(vc4_crtc->channel));
+ /* Top/base are supposed to be 4-pixel aligned, but the
+ * Raspberry Pi firmware fills the low bits (which are
+ * presumably ignored).
+ */
+ u32 top = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_TOP) & ~3;
+ u32 base = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_BASE) & ~3;
+
+ vc4_crtc->cob_size = top - base + 4;
+}
+
+static int vc4_crtc_bind(struct device *dev, struct device *master, void *data)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct drm_device *drm = dev_get_drvdata(master);
+ struct vc4_crtc *vc4_crtc;
+ struct drm_crtc *crtc;
+ struct drm_plane *primary_plane, *cursor_plane, *destroy_plane, *temp;
+ const struct of_device_id *match;
+ int ret, i;
+
+ vc4_crtc = devm_kzalloc(dev, sizeof(*vc4_crtc), GFP_KERNEL);
+ if (!vc4_crtc)
+ return -ENOMEM;
+ crtc = &vc4_crtc->base;
+
+ match = of_match_device(vc4_crtc_dt_match, dev);
+ if (!match)
+ return -ENODEV;
+ vc4_crtc->data = match->data;
+
+ vc4_crtc->regs = vc4_ioremap_regs(pdev, 0);
+ if (IS_ERR(vc4_crtc->regs))
+ return PTR_ERR(vc4_crtc->regs);
+
+ /* For now, we create just the primary and the legacy cursor
+ * planes. We should be able to stack more planes on easily,
+ * but to do that we would need to compute the bandwidth
+ * requirement of the plane configuration, and reject ones
+ * that will take too much.
+ */
+ primary_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_PRIMARY);
+ if (IS_ERR(primary_plane)) {
+ dev_err(dev, "failed to construct primary plane\n");
+ ret = PTR_ERR(primary_plane);
+ goto err;
+ }
+
+ drm_crtc_init_with_planes(drm, crtc, primary_plane, NULL,
+ &vc4_crtc_funcs, NULL);
+ drm_crtc_helper_add(crtc, &vc4_crtc_helper_funcs);
+ vc4_crtc->channel = vc4_crtc->data->hvs_channel;
+ drm_mode_crtc_set_gamma_size(crtc, ARRAY_SIZE(vc4_crtc->lut_r));
+ drm_crtc_enable_color_mgmt(crtc, 0, false, crtc->gamma_size);
+
+ /* We support CTM, but only for one CRTC at a time. It's therefore
+ * implemented as private driver state in vc4_kms, not here.
+ */
+ drm_crtc_enable_color_mgmt(crtc, 0, true, crtc->gamma_size);
+
+ /* Set up some arbitrary number of planes. We're not limited
+ * by a set number of physical registers, just the space in
+ * the HVS (16k) and how small an plane can be (28 bytes).
+ * However, each plane we set up takes up some memory, and
+ * increases the cost of looping over planes, which atomic
+ * modesetting does quite a bit. As a result, we pick a
+ * modest number of planes to expose, that should hopefully
+ * still cover any sane usecase.
+ */
+ for (i = 0; i < 8; i++) {
+ struct drm_plane *plane =
+ vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY);
+
+ if (IS_ERR(plane))
+ continue;
+
+ plane->possible_crtcs = drm_crtc_mask(crtc);
+ }
+
+ /* Set up the legacy cursor after overlay initialization,
+ * since we overlay planes on the CRTC in the order they were
+ * initialized.
+ */
+ cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR);
+ if (!IS_ERR(cursor_plane)) {
+ cursor_plane->possible_crtcs = drm_crtc_mask(crtc);
+ crtc->cursor = cursor_plane;
+ }
+
+ vc4_crtc_get_cob_allocation(vc4_crtc);
+
+ CRTC_WRITE(PV_INTEN, 0);
+ CRTC_WRITE(PV_INTSTAT, PV_INT_VFP_START);
+ ret = devm_request_irq(dev, platform_get_irq(pdev, 0),
+ vc4_crtc_irq_handler, 0, "vc4 crtc", vc4_crtc);
+ if (ret)
+ goto err_destroy_planes;
+
+ vc4_set_crtc_possible_masks(drm, crtc);
+
+ for (i = 0; i < crtc->gamma_size; i++) {
+ vc4_crtc->lut_r[i] = i;
+ vc4_crtc->lut_g[i] = i;
+ vc4_crtc->lut_b[i] = i;
+ }
+
+ platform_set_drvdata(pdev, vc4_crtc);
+
+ return 0;
+
+err_destroy_planes:
+ list_for_each_entry_safe(destroy_plane, temp,
+ &drm->mode_config.plane_list, head) {
+ if (destroy_plane->possible_crtcs == drm_crtc_mask(crtc))
+ destroy_plane->funcs->destroy(destroy_plane);
+ }
+err:
+ return ret;
+}
+
+static void vc4_crtc_unbind(struct device *dev, struct device *master,
+ void *data)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct vc4_crtc *vc4_crtc = dev_get_drvdata(dev);
+
+ vc4_crtc_destroy(&vc4_crtc->base);
+
+ CRTC_WRITE(PV_INTEN, 0);
+
+ platform_set_drvdata(pdev, NULL);
+}
+
+static const struct component_ops vc4_crtc_ops = {
+ .bind = vc4_crtc_bind,
+ .unbind = vc4_crtc_unbind,
+};
+
+static int vc4_crtc_dev_probe(struct platform_device *pdev)
+{
+ return component_add(&pdev->dev, &vc4_crtc_ops);
+}
+
+static int vc4_crtc_dev_remove(struct platform_device *pdev)
+{
+ component_del(&pdev->dev, &vc4_crtc_ops);
+ return 0;
+}
+
+struct platform_driver vc4_crtc_driver = {
+ .probe = vc4_crtc_dev_probe,
+ .remove = vc4_crtc_dev_remove,
+ .driver = {
+ .name = "vc4_crtc",
+ .of_match_table = vc4_crtc_dt_match,
+ },
+};
diff --git a/drivers/gpu/drm/vc4/vc4_debugfs.c b/drivers/gpu/drm/vc4/vc4_debugfs.c
new file mode 100644
index 000000000..7a0003de7
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_debugfs.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/seq_file.h>
+#include <linux/circ_buf.h>
+#include <linux/ctype.h>
+#include <linux/debugfs.h>
+#include <drm/drmP.h>
+
+#include "vc4_drv.h"
+#include "vc4_regs.h"
+
+static const struct drm_info_list vc4_debugfs_list[] = {
+ {"bo_stats", vc4_bo_stats_debugfs, 0},
+ {"dpi_regs", vc4_dpi_debugfs_regs, 0},
+ {"dsi1_regs", vc4_dsi_debugfs_regs, 0, (void *)(uintptr_t)1},
+ {"hdmi_regs", vc4_hdmi_debugfs_regs, 0},
+ {"vec_regs", vc4_vec_debugfs_regs, 0},
+ {"txp_regs", vc4_txp_debugfs_regs, 0},
+ {"hvs_regs", vc4_hvs_debugfs_regs, 0},
+ {"crtc0_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)0},
+ {"crtc1_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)1},
+ {"crtc2_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)2},
+ {"v3d_ident", vc4_v3d_debugfs_ident, 0},
+ {"v3d_regs", vc4_v3d_debugfs_regs, 0},
+};
+
+#define VC4_DEBUGFS_ENTRIES ARRAY_SIZE(vc4_debugfs_list)
+
+int
+vc4_debugfs_init(struct drm_minor *minor)
+{
+ return drm_debugfs_create_files(vc4_debugfs_list, VC4_DEBUGFS_ENTRIES,
+ minor->debugfs_root, minor);
+}
diff --git a/drivers/gpu/drm/vc4/vc4_dpi.c b/drivers/gpu/drm/vc4/vc4_dpi.c
new file mode 100644
index 000000000..f18581297
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_dpi.c
@@ -0,0 +1,402 @@
+/*
+ * Copyright (C) 2016 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * DOC: VC4 DPI module
+ *
+ * The VC4 DPI hardware supports MIPI DPI type 4 and Nokia ViSSI
+ * signals. On BCM2835, these can be routed out to GPIO0-27 with the
+ * ALT2 function.
+ */
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_of.h>
+#include <drm/drm_panel.h>
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/of_graph.h>
+#include <linux/of_platform.h>
+#include "vc4_drv.h"
+#include "vc4_regs.h"
+
+#define DPI_C 0x00
+# define DPI_OUTPUT_ENABLE_MODE BIT(16)
+
+/* The order field takes the incoming 24 bit RGB from the pixel valve
+ * and shuffles the 3 channels.
+ */
+# define DPI_ORDER_MASK VC4_MASK(15, 14)
+# define DPI_ORDER_SHIFT 14
+# define DPI_ORDER_RGB 0
+# define DPI_ORDER_BGR 1
+# define DPI_ORDER_GRB 2
+# define DPI_ORDER_BRG 3
+
+/* The format field takes the ORDER-shuffled pixel valve data and
+ * formats it onto the output lines.
+ */
+# define DPI_FORMAT_MASK VC4_MASK(13, 11)
+# define DPI_FORMAT_SHIFT 11
+/* This define is named in the hardware, but actually just outputs 0. */
+# define DPI_FORMAT_9BIT_666_RGB 0
+/* Outputs 00000000rrrrrggggggbbbbb */
+# define DPI_FORMAT_16BIT_565_RGB_1 1
+/* Outputs 000rrrrr00gggggg000bbbbb */
+# define DPI_FORMAT_16BIT_565_RGB_2 2
+/* Outputs 00rrrrr000gggggg00bbbbb0 */
+# define DPI_FORMAT_16BIT_565_RGB_3 3
+/* Outputs 000000rrrrrrggggggbbbbbb */
+# define DPI_FORMAT_18BIT_666_RGB_1 4
+/* Outputs 00rrrrrr00gggggg00bbbbbb */
+# define DPI_FORMAT_18BIT_666_RGB_2 5
+/* Outputs rrrrrrrrggggggggbbbbbbbb */
+# define DPI_FORMAT_24BIT_888_RGB 6
+
+/* Reverses the polarity of the corresponding signal */
+# define DPI_PIXEL_CLK_INVERT BIT(10)
+# define DPI_HSYNC_INVERT BIT(9)
+# define DPI_VSYNC_INVERT BIT(8)
+# define DPI_OUTPUT_ENABLE_INVERT BIT(7)
+
+/* Outputs the signal the falling clock edge instead of rising. */
+# define DPI_HSYNC_NEGATE BIT(6)
+# define DPI_VSYNC_NEGATE BIT(5)
+# define DPI_OUTPUT_ENABLE_NEGATE BIT(4)
+
+/* Disables the signal */
+# define DPI_HSYNC_DISABLE BIT(3)
+# define DPI_VSYNC_DISABLE BIT(2)
+# define DPI_OUTPUT_ENABLE_DISABLE BIT(1)
+
+/* Power gate to the device, full reset at 0 -> 1 transition */
+# define DPI_ENABLE BIT(0)
+
+/* All other registers besides DPI_C return the ID */
+#define DPI_ID 0x04
+# define DPI_ID_VALUE 0x00647069
+
+/* General DPI hardware state. */
+struct vc4_dpi {
+ struct platform_device *pdev;
+
+ struct drm_encoder *encoder;
+
+ void __iomem *regs;
+
+ struct clk *pixel_clock;
+ struct clk *core_clock;
+};
+
+#define DPI_READ(offset) readl(dpi->regs + (offset))
+#define DPI_WRITE(offset, val) writel(val, dpi->regs + (offset))
+
+/* VC4 DPI encoder KMS struct */
+struct vc4_dpi_encoder {
+ struct vc4_encoder base;
+ struct vc4_dpi *dpi;
+};
+
+static inline struct vc4_dpi_encoder *
+to_vc4_dpi_encoder(struct drm_encoder *encoder)
+{
+ return container_of(encoder, struct vc4_dpi_encoder, base.base);
+}
+
+#define DPI_REG(reg) { reg, #reg }
+static const struct {
+ u32 reg;
+ const char *name;
+} dpi_regs[] = {
+ DPI_REG(DPI_C),
+ DPI_REG(DPI_ID),
+};
+
+#ifdef CONFIG_DEBUG_FS
+int vc4_dpi_debugfs_regs(struct seq_file *m, void *unused)
+{
+ struct drm_info_node *node = (struct drm_info_node *)m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_dpi *dpi = vc4->dpi;
+ int i;
+
+ if (!dpi)
+ return 0;
+
+ for (i = 0; i < ARRAY_SIZE(dpi_regs); i++) {
+ seq_printf(m, "%s (0x%04x): 0x%08x\n",
+ dpi_regs[i].name, dpi_regs[i].reg,
+ DPI_READ(dpi_regs[i].reg));
+ }
+
+ return 0;
+}
+#endif
+
+static const struct drm_encoder_funcs vc4_dpi_encoder_funcs = {
+ .destroy = drm_encoder_cleanup,
+};
+
+static void vc4_dpi_encoder_disable(struct drm_encoder *encoder)
+{
+ struct vc4_dpi_encoder *vc4_encoder = to_vc4_dpi_encoder(encoder);
+ struct vc4_dpi *dpi = vc4_encoder->dpi;
+
+ clk_disable_unprepare(dpi->pixel_clock);
+}
+
+static void vc4_dpi_encoder_enable(struct drm_encoder *encoder)
+{
+ struct drm_device *dev = encoder->dev;
+ struct drm_display_mode *mode = &encoder->crtc->mode;
+ struct vc4_dpi_encoder *vc4_encoder = to_vc4_dpi_encoder(encoder);
+ struct vc4_dpi *dpi = vc4_encoder->dpi;
+ struct drm_connector_list_iter conn_iter;
+ struct drm_connector *connector = NULL, *connector_scan;
+ u32 dpi_c = DPI_ENABLE | DPI_OUTPUT_ENABLE_MODE;
+ int ret;
+
+ /* Look up the connector attached to DPI so we can get the
+ * bus_format. Ideally the bridge would tell us the
+ * bus_format we want, but it doesn't yet, so assume that it's
+ * uniform throughout the bridge chain.
+ */
+ drm_connector_list_iter_begin(dev, &conn_iter);
+ drm_for_each_connector_iter(connector_scan, &conn_iter) {
+ if (connector_scan->encoder == encoder) {
+ connector = connector_scan;
+ break;
+ }
+ }
+ drm_connector_list_iter_end(&conn_iter);
+
+ if (connector && connector->display_info.num_bus_formats) {
+ u32 bus_format = connector->display_info.bus_formats[0];
+
+ switch (bus_format) {
+ case MEDIA_BUS_FMT_RGB888_1X24:
+ dpi_c |= VC4_SET_FIELD(DPI_FORMAT_24BIT_888_RGB,
+ DPI_FORMAT);
+ break;
+ case MEDIA_BUS_FMT_BGR888_1X24:
+ dpi_c |= VC4_SET_FIELD(DPI_FORMAT_24BIT_888_RGB,
+ DPI_FORMAT);
+ dpi_c |= VC4_SET_FIELD(DPI_ORDER_BGR, DPI_ORDER);
+ break;
+ case MEDIA_BUS_FMT_RGB666_1X24_CPADHI:
+ dpi_c |= VC4_SET_FIELD(DPI_FORMAT_18BIT_666_RGB_2,
+ DPI_FORMAT);
+ break;
+ case MEDIA_BUS_FMT_RGB666_1X18:
+ dpi_c |= VC4_SET_FIELD(DPI_FORMAT_18BIT_666_RGB_1,
+ DPI_FORMAT);
+ break;
+ case MEDIA_BUS_FMT_RGB565_1X16:
+ dpi_c |= VC4_SET_FIELD(DPI_FORMAT_16BIT_565_RGB_3,
+ DPI_FORMAT);
+ break;
+ default:
+ DRM_ERROR("Unknown media bus format %d\n", bus_format);
+ break;
+ }
+ } else {
+ /* Default to 24bit if no connector found. */
+ dpi_c |= VC4_SET_FIELD(DPI_FORMAT_24BIT_888_RGB, DPI_FORMAT);
+ }
+
+ if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+ dpi_c |= DPI_HSYNC_INVERT;
+ else if (!(mode->flags & DRM_MODE_FLAG_PHSYNC))
+ dpi_c |= DPI_HSYNC_DISABLE;
+
+ if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+ dpi_c |= DPI_VSYNC_INVERT;
+ else if (!(mode->flags & DRM_MODE_FLAG_PVSYNC))
+ dpi_c |= DPI_VSYNC_DISABLE;
+
+ DPI_WRITE(DPI_C, dpi_c);
+
+ ret = clk_set_rate(dpi->pixel_clock, mode->clock * 1000);
+ if (ret)
+ DRM_ERROR("Failed to set clock rate: %d\n", ret);
+
+ ret = clk_prepare_enable(dpi->pixel_clock);
+ if (ret)
+ DRM_ERROR("Failed to set clock rate: %d\n", ret);
+}
+
+static enum drm_mode_status vc4_dpi_encoder_mode_valid(struct drm_encoder *encoder,
+ const struct drm_display_mode *mode)
+{
+ if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+ return MODE_NO_INTERLACE;
+
+ return MODE_OK;
+}
+
+static const struct drm_encoder_helper_funcs vc4_dpi_encoder_helper_funcs = {
+ .disable = vc4_dpi_encoder_disable,
+ .enable = vc4_dpi_encoder_enable,
+ .mode_valid = vc4_dpi_encoder_mode_valid,
+};
+
+static const struct of_device_id vc4_dpi_dt_match[] = {
+ { .compatible = "brcm,bcm2835-dpi", .data = NULL },
+ {}
+};
+
+/* Sets up the next link in the display chain, whether it's a panel or
+ * a bridge.
+ */
+static int vc4_dpi_init_bridge(struct vc4_dpi *dpi)
+{
+ struct device *dev = &dpi->pdev->dev;
+ struct drm_panel *panel;
+ struct drm_bridge *bridge;
+ int ret;
+
+ ret = drm_of_find_panel_or_bridge(dev->of_node, 0, 0,
+ &panel, &bridge);
+ if (ret) {
+ /* If nothing was connected in the DT, that's not an
+ * error.
+ */
+ if (ret == -ENODEV)
+ return 0;
+ else
+ return ret;
+ }
+
+ if (panel)
+ bridge = drm_panel_bridge_add(panel, DRM_MODE_CONNECTOR_DPI);
+
+ return drm_bridge_attach(dpi->encoder, bridge, NULL);
+}
+
+static int vc4_dpi_bind(struct device *dev, struct device *master, void *data)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct drm_device *drm = dev_get_drvdata(master);
+ struct vc4_dev *vc4 = to_vc4_dev(drm);
+ struct vc4_dpi *dpi;
+ struct vc4_dpi_encoder *vc4_dpi_encoder;
+ int ret;
+
+ dpi = devm_kzalloc(dev, sizeof(*dpi), GFP_KERNEL);
+ if (!dpi)
+ return -ENOMEM;
+
+ vc4_dpi_encoder = devm_kzalloc(dev, sizeof(*vc4_dpi_encoder),
+ GFP_KERNEL);
+ if (!vc4_dpi_encoder)
+ return -ENOMEM;
+ vc4_dpi_encoder->base.type = VC4_ENCODER_TYPE_DPI;
+ vc4_dpi_encoder->dpi = dpi;
+ dpi->encoder = &vc4_dpi_encoder->base.base;
+
+ dpi->pdev = pdev;
+ dpi->regs = vc4_ioremap_regs(pdev, 0);
+ if (IS_ERR(dpi->regs))
+ return PTR_ERR(dpi->regs);
+
+ if (DPI_READ(DPI_ID) != DPI_ID_VALUE) {
+ dev_err(dev, "Port returned 0x%08x for ID instead of 0x%08x\n",
+ DPI_READ(DPI_ID), DPI_ID_VALUE);
+ return -ENODEV;
+ }
+
+ dpi->core_clock = devm_clk_get(dev, "core");
+ if (IS_ERR(dpi->core_clock)) {
+ ret = PTR_ERR(dpi->core_clock);
+ if (ret != -EPROBE_DEFER)
+ DRM_ERROR("Failed to get core clock: %d\n", ret);
+ return ret;
+ }
+ dpi->pixel_clock = devm_clk_get(dev, "pixel");
+ if (IS_ERR(dpi->pixel_clock)) {
+ ret = PTR_ERR(dpi->pixel_clock);
+ if (ret != -EPROBE_DEFER)
+ DRM_ERROR("Failed to get pixel clock: %d\n", ret);
+ return ret;
+ }
+
+ ret = clk_prepare_enable(dpi->core_clock);
+ if (ret)
+ DRM_ERROR("Failed to turn on core clock: %d\n", ret);
+
+ drm_encoder_init(drm, dpi->encoder, &vc4_dpi_encoder_funcs,
+ DRM_MODE_ENCODER_DPI, NULL);
+ drm_encoder_helper_add(dpi->encoder, &vc4_dpi_encoder_helper_funcs);
+
+ ret = vc4_dpi_init_bridge(dpi);
+ if (ret)
+ goto err_destroy_encoder;
+
+ dev_set_drvdata(dev, dpi);
+
+ vc4->dpi = dpi;
+
+ return 0;
+
+err_destroy_encoder:
+ drm_encoder_cleanup(dpi->encoder);
+ clk_disable_unprepare(dpi->core_clock);
+ return ret;
+}
+
+static void vc4_dpi_unbind(struct device *dev, struct device *master,
+ void *data)
+{
+ struct drm_device *drm = dev_get_drvdata(master);
+ struct vc4_dev *vc4 = to_vc4_dev(drm);
+ struct vc4_dpi *dpi = dev_get_drvdata(dev);
+
+ drm_of_panel_bridge_remove(dev->of_node, 0, 0);
+
+ drm_encoder_cleanup(dpi->encoder);
+
+ clk_disable_unprepare(dpi->core_clock);
+
+ vc4->dpi = NULL;
+}
+
+static const struct component_ops vc4_dpi_ops = {
+ .bind = vc4_dpi_bind,
+ .unbind = vc4_dpi_unbind,
+};
+
+static int vc4_dpi_dev_probe(struct platform_device *pdev)
+{
+ return component_add(&pdev->dev, &vc4_dpi_ops);
+}
+
+static int vc4_dpi_dev_remove(struct platform_device *pdev)
+{
+ component_del(&pdev->dev, &vc4_dpi_ops);
+ return 0;
+}
+
+struct platform_driver vc4_dpi_driver = {
+ .probe = vc4_dpi_dev_probe,
+ .remove = vc4_dpi_dev_remove,
+ .driver = {
+ .name = "vc4_dpi",
+ .of_match_table = vc4_dpi_dt_match,
+ },
+};
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
new file mode 100644
index 000000000..868dd1ef3
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -0,0 +1,413 @@
+/*
+ * Copyright (C) 2014-2015 Broadcom
+ * Copyright (C) 2013 Red Hat
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/**
+ * DOC: Broadcom VC4 Graphics Driver
+ *
+ * The Broadcom VideoCore 4 (present in the Raspberry Pi) contains a
+ * OpenGL ES 2.0-compatible 3D engine called V3D, and a highly
+ * configurable display output pipeline that supports HDMI, DSI, DPI,
+ * and Composite TV output.
+ *
+ * The 3D engine also has an interface for submitting arbitrary
+ * compute shader-style jobs using the same shader processor as is
+ * used for vertex and fragment shaders in GLES 2.0. However, given
+ * that the hardware isn't able to expose any standard interfaces like
+ * OpenGL compute shaders or OpenCL, it isn't supported by this
+ * driver.
+ */
+
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_fb_helper.h>
+
+#include "uapi/drm/vc4_drm.h"
+#include "vc4_drv.h"
+#include "vc4_regs.h"
+
+#define DRIVER_NAME "vc4"
+#define DRIVER_DESC "Broadcom VC4 graphics"
+#define DRIVER_DATE "20140616"
+#define DRIVER_MAJOR 0
+#define DRIVER_MINOR 0
+#define DRIVER_PATCHLEVEL 0
+
+/* Helper function for mapping the regs on a platform device. */
+void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index)
+{
+ struct resource *res;
+ void __iomem *map;
+
+ res = platform_get_resource(dev, IORESOURCE_MEM, index);
+ map = devm_ioremap_resource(&dev->dev, res);
+ if (IS_ERR(map)) {
+ DRM_ERROR("Failed to map registers: %ld\n", PTR_ERR(map));
+ return map;
+ }
+
+ return map;
+}
+
+static int vc4_get_param_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct drm_vc4_get_param *args = data;
+ int ret;
+
+ if (args->pad != 0)
+ return -EINVAL;
+
+ switch (args->param) {
+ case DRM_VC4_PARAM_V3D_IDENT0:
+ ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
+ if (ret < 0)
+ return ret;
+ args->value = V3D_READ(V3D_IDENT0);
+ pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev);
+ pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev);
+ break;
+ case DRM_VC4_PARAM_V3D_IDENT1:
+ ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
+ if (ret < 0)
+ return ret;
+ args->value = V3D_READ(V3D_IDENT1);
+ pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev);
+ pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev);
+ break;
+ case DRM_VC4_PARAM_V3D_IDENT2:
+ ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
+ if (ret < 0)
+ return ret;
+ args->value = V3D_READ(V3D_IDENT2);
+ pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev);
+ pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev);
+ break;
+ case DRM_VC4_PARAM_SUPPORTS_BRANCHES:
+ case DRM_VC4_PARAM_SUPPORTS_ETC1:
+ case DRM_VC4_PARAM_SUPPORTS_THREADED_FS:
+ case DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER:
+ case DRM_VC4_PARAM_SUPPORTS_MADVISE:
+ case DRM_VC4_PARAM_SUPPORTS_PERFMON:
+ args->value = true;
+ break;
+ default:
+ DRM_DEBUG("Unknown parameter %d\n", args->param);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int vc4_open(struct drm_device *dev, struct drm_file *file)
+{
+ struct vc4_file *vc4file;
+
+ vc4file = kzalloc(sizeof(*vc4file), GFP_KERNEL);
+ if (!vc4file)
+ return -ENOMEM;
+
+ vc4_perfmon_open_file(vc4file);
+ file->driver_priv = vc4file;
+ return 0;
+}
+
+static void vc4_close(struct drm_device *dev, struct drm_file *file)
+{
+ struct vc4_file *vc4file = file->driver_priv;
+
+ vc4_perfmon_close_file(vc4file);
+ kfree(vc4file);
+}
+
+static const struct vm_operations_struct vc4_vm_ops = {
+ .fault = vc4_fault,
+ .open = drm_gem_vm_open,
+ .close = drm_gem_vm_close,
+};
+
+static const struct file_operations vc4_drm_fops = {
+ .owner = THIS_MODULE,
+ .open = drm_open,
+ .release = drm_release,
+ .unlocked_ioctl = drm_ioctl,
+ .mmap = vc4_mmap,
+ .poll = drm_poll,
+ .read = drm_read,
+ .compat_ioctl = drm_compat_ioctl,
+ .llseek = noop_llseek,
+};
+
+static const struct drm_ioctl_desc vc4_drm_ioctls[] = {
+ DRM_IOCTL_DEF_DRV(VC4_SUBMIT_CL, vc4_submit_cl_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(VC4_WAIT_SEQNO, vc4_wait_seqno_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(VC4_WAIT_BO, vc4_wait_bo_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl,
+ DRM_ROOT_ONLY),
+ DRM_IOCTL_DEF_DRV(VC4_GET_PARAM, vc4_get_param_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(VC4_SET_TILING, vc4_set_tiling_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(VC4_GET_TILING, vc4_get_tiling_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(VC4_LABEL_BO, vc4_label_bo_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(VC4_GEM_MADVISE, vc4_gem_madvise_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(VC4_PERFMON_CREATE, vc4_perfmon_create_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(VC4_PERFMON_DESTROY, vc4_perfmon_destroy_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(VC4_PERFMON_GET_VALUES, vc4_perfmon_get_values_ioctl, DRM_RENDER_ALLOW),
+};
+
+static struct drm_driver vc4_drm_driver = {
+ .driver_features = (DRIVER_MODESET |
+ DRIVER_ATOMIC |
+ DRIVER_GEM |
+ DRIVER_HAVE_IRQ |
+ DRIVER_RENDER |
+ DRIVER_PRIME |
+ DRIVER_SYNCOBJ),
+ .lastclose = drm_fb_helper_lastclose,
+ .open = vc4_open,
+ .postclose = vc4_close,
+ .irq_handler = vc4_irq,
+ .irq_preinstall = vc4_irq_preinstall,
+ .irq_postinstall = vc4_irq_postinstall,
+ .irq_uninstall = vc4_irq_uninstall,
+
+ .get_scanout_position = vc4_crtc_get_scanoutpos,
+ .get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos,
+
+#if defined(CONFIG_DEBUG_FS)
+ .debugfs_init = vc4_debugfs_init,
+#endif
+
+ .gem_create_object = vc4_create_object,
+ .gem_free_object_unlocked = vc4_free_object,
+ .gem_vm_ops = &vc4_vm_ops,
+
+ .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+ .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+ .gem_prime_import = drm_gem_prime_import,
+ .gem_prime_export = vc4_prime_export,
+ .gem_prime_res_obj = vc4_prime_res_obj,
+ .gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table,
+ .gem_prime_import_sg_table = vc4_prime_import_sg_table,
+ .gem_prime_vmap = vc4_prime_vmap,
+ .gem_prime_vunmap = drm_gem_cma_prime_vunmap,
+ .gem_prime_mmap = vc4_prime_mmap,
+
+ .dumb_create = vc4_dumb_create,
+
+ .ioctls = vc4_drm_ioctls,
+ .num_ioctls = ARRAY_SIZE(vc4_drm_ioctls),
+ .fops = &vc4_drm_fops,
+
+ .name = DRIVER_NAME,
+ .desc = DRIVER_DESC,
+ .date = DRIVER_DATE,
+ .major = DRIVER_MAJOR,
+ .minor = DRIVER_MINOR,
+ .patchlevel = DRIVER_PATCHLEVEL,
+};
+
+static int compare_dev(struct device *dev, void *data)
+{
+ return dev == data;
+}
+
+static void vc4_match_add_drivers(struct device *dev,
+ struct component_match **match,
+ struct platform_driver *const *drivers,
+ int count)
+{
+ int i;
+
+ for (i = 0; i < count; i++) {
+ struct device_driver *drv = &drivers[i]->driver;
+ struct device *p = NULL, *d;
+
+ while ((d = bus_find_device(&platform_bus_type, p, drv,
+ (void *)platform_bus_type.match))) {
+ put_device(p);
+ component_match_add(dev, match, compare_dev, d);
+ p = d;
+ }
+ put_device(p);
+ }
+}
+
+static void vc4_kick_out_firmware_fb(void)
+{
+ struct apertures_struct *ap;
+
+ ap = alloc_apertures(1);
+ if (!ap)
+ return;
+
+ /* Since VC4 is a UMA device, the simplefb node may have been
+ * located anywhere in memory.
+ */
+ ap->ranges[0].base = 0;
+ ap->ranges[0].size = ~0;
+
+ drm_fb_helper_remove_conflicting_framebuffers(ap, "vc4drmfb", false);
+ kfree(ap);
+}
+
+static int vc4_drm_bind(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct drm_device *drm;
+ struct vc4_dev *vc4;
+ int ret = 0;
+
+ dev->coherent_dma_mask = DMA_BIT_MASK(32);
+
+ vc4 = devm_kzalloc(dev, sizeof(*vc4), GFP_KERNEL);
+ if (!vc4)
+ return -ENOMEM;
+
+ drm = drm_dev_alloc(&vc4_drm_driver, dev);
+ if (IS_ERR(drm))
+ return PTR_ERR(drm);
+ platform_set_drvdata(pdev, drm);
+ vc4->dev = drm;
+ drm->dev_private = vc4;
+
+ ret = vc4_bo_cache_init(drm);
+ if (ret)
+ goto dev_put;
+
+ drm_mode_config_init(drm);
+
+ vc4_gem_init(drm);
+
+ ret = component_bind_all(dev, drm);
+ if (ret)
+ goto gem_destroy;
+
+ vc4_kick_out_firmware_fb();
+
+ ret = drm_dev_register(drm, 0);
+ if (ret < 0)
+ goto unbind_all;
+
+ vc4_kms_load(drm);
+
+ return 0;
+
+unbind_all:
+ component_unbind_all(dev, drm);
+gem_destroy:
+ vc4_gem_destroy(drm);
+ drm_mode_config_cleanup(drm);
+ vc4_bo_cache_destroy(drm);
+dev_put:
+ drm_dev_put(drm);
+ return ret;
+}
+
+static void vc4_drm_unbind(struct device *dev)
+{
+ struct drm_device *drm = dev_get_drvdata(dev);
+ struct vc4_dev *vc4 = to_vc4_dev(drm);
+
+ drm_dev_unregister(drm);
+
+ drm_fb_cma_fbdev_fini(drm);
+
+ drm_mode_config_cleanup(drm);
+
+ drm_atomic_private_obj_fini(&vc4->ctm_manager);
+
+ drm_dev_put(drm);
+}
+
+static const struct component_master_ops vc4_drm_ops = {
+ .bind = vc4_drm_bind,
+ .unbind = vc4_drm_unbind,
+};
+
+static struct platform_driver *const component_drivers[] = {
+ &vc4_hdmi_driver,
+ &vc4_vec_driver,
+ &vc4_dpi_driver,
+ &vc4_dsi_driver,
+ &vc4_txp_driver,
+ &vc4_hvs_driver,
+ &vc4_crtc_driver,
+ &vc4_v3d_driver,
+};
+
+static int vc4_platform_drm_probe(struct platform_device *pdev)
+{
+ struct component_match *match = NULL;
+ struct device *dev = &pdev->dev;
+
+ vc4_match_add_drivers(dev, &match,
+ component_drivers, ARRAY_SIZE(component_drivers));
+
+ return component_master_add_with_match(dev, &vc4_drm_ops, match);
+}
+
+static int vc4_platform_drm_remove(struct platform_device *pdev)
+{
+ component_master_del(&pdev->dev, &vc4_drm_ops);
+
+ return 0;
+}
+
+static const struct of_device_id vc4_of_match[] = {
+ { .compatible = "brcm,bcm2835-vc4", },
+ { .compatible = "brcm,cygnus-vc4", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, vc4_of_match);
+
+static struct platform_driver vc4_platform_driver = {
+ .probe = vc4_platform_drm_probe,
+ .remove = vc4_platform_drm_remove,
+ .driver = {
+ .name = "vc4-drm",
+ .of_match_table = vc4_of_match,
+ },
+};
+
+static int __init vc4_drm_register(void)
+{
+ int ret;
+
+ ret = platform_register_drivers(component_drivers,
+ ARRAY_SIZE(component_drivers));
+ if (ret)
+ return ret;
+
+ return platform_driver_register(&vc4_platform_driver);
+}
+
+static void __exit vc4_drm_unregister(void)
+{
+ platform_unregister_drivers(component_drivers,
+ ARRAY_SIZE(component_drivers));
+ platform_driver_unregister(&vc4_platform_driver);
+}
+
+module_init(vc4_drm_register);
+module_exit(vc4_drm_unregister);
+
+MODULE_ALIAS("platform:vc4-drm");
+MODULE_DESCRIPTION("Broadcom VC4 DRM Driver");
+MODULE_AUTHOR("Eric Anholt <eric@anholt.net>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
new file mode 100644
index 000000000..bd6ef1f31
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -0,0 +1,822 @@
+/*
+ * Copyright (C) 2015 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/mm_types.h>
+#include <linux/reservation.h>
+#include <drm/drmP.h>
+#include <drm/drm_encoder.h>
+#include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_atomic.h>
+#include <drm/drm_syncobj.h>
+
+#include "uapi/drm/vc4_drm.h"
+
+/* Don't forget to update vc4_bo.c: bo_type_names[] when adding to
+ * this.
+ */
+enum vc4_kernel_bo_type {
+ /* Any kernel allocation (gem_create_object hook) before it
+ * gets another type set.
+ */
+ VC4_BO_TYPE_KERNEL,
+ VC4_BO_TYPE_V3D,
+ VC4_BO_TYPE_V3D_SHADER,
+ VC4_BO_TYPE_DUMB,
+ VC4_BO_TYPE_BIN,
+ VC4_BO_TYPE_RCL,
+ VC4_BO_TYPE_BCL,
+ VC4_BO_TYPE_KERNEL_CACHE,
+ VC4_BO_TYPE_COUNT
+};
+
+/* Performance monitor object. The perform lifetime is controlled by userspace
+ * using perfmon related ioctls. A perfmon can be attached to a submit_cl
+ * request, and when this is the case, HW perf counters will be activated just
+ * before the submit_cl is submitted to the GPU and disabled when the job is
+ * done. This way, only events related to a specific job will be counted.
+ */
+struct vc4_perfmon {
+ /* Tracks the number of users of the perfmon, when this counter reaches
+ * zero the perfmon is destroyed.
+ */
+ refcount_t refcnt;
+
+ /* Number of counters activated in this perfmon instance
+ * (should be less than DRM_VC4_MAX_PERF_COUNTERS).
+ */
+ u8 ncounters;
+
+ /* Events counted by the HW perf counters. */
+ u8 events[DRM_VC4_MAX_PERF_COUNTERS];
+
+ /* Storage for counter values. Counters are incremented by the HW
+ * perf counter values every time the perfmon is attached to a GPU job.
+ * This way, perfmon users don't have to retrieve the results after
+ * each job if they want to track events covering several submissions.
+ * Note that counter values can't be reset, but you can fake a reset by
+ * destroying the perfmon and creating a new one.
+ */
+ u64 counters[0];
+};
+
+struct vc4_dev {
+ struct drm_device *dev;
+
+ struct vc4_hdmi *hdmi;
+ struct vc4_hvs *hvs;
+ struct vc4_v3d *v3d;
+ struct vc4_dpi *dpi;
+ struct vc4_dsi *dsi1;
+ struct vc4_vec *vec;
+ struct vc4_txp *txp;
+
+ struct vc4_hang_state *hang_state;
+
+ /* The kernel-space BO cache. Tracks buffers that have been
+ * unreferenced by all other users (refcounts of 0!) but not
+ * yet freed, so we can do cheap allocations.
+ */
+ struct vc4_bo_cache {
+ /* Array of list heads for entries in the BO cache,
+ * based on number of pages, so we can do O(1) lookups
+ * in the cache when allocating.
+ */
+ struct list_head *size_list;
+ uint32_t size_list_size;
+
+ /* List of all BOs in the cache, ordered by age, so we
+ * can do O(1) lookups when trying to free old
+ * buffers.
+ */
+ struct list_head time_list;
+ struct work_struct time_work;
+ struct timer_list time_timer;
+ } bo_cache;
+
+ u32 num_labels;
+ struct vc4_label {
+ const char *name;
+ u32 num_allocated;
+ u32 size_allocated;
+ } *bo_labels;
+
+ /* Protects bo_cache and bo_labels. */
+ struct mutex bo_lock;
+
+ /* Purgeable BO pool. All BOs in this pool can have their memory
+ * reclaimed if the driver is unable to allocate new BOs. We also
+ * keep stats related to the purge mechanism here.
+ */
+ struct {
+ struct list_head list;
+ unsigned int num;
+ size_t size;
+ unsigned int purged_num;
+ size_t purged_size;
+ struct mutex lock;
+ } purgeable;
+
+ uint64_t dma_fence_context;
+
+ /* Sequence number for the last job queued in bin_job_list.
+ * Starts at 0 (no jobs emitted).
+ */
+ uint64_t emit_seqno;
+
+ /* Sequence number for the last completed job on the GPU.
+ * Starts at 0 (no jobs completed).
+ */
+ uint64_t finished_seqno;
+
+ /* List of all struct vc4_exec_info for jobs to be executed in
+ * the binner. The first job in the list is the one currently
+ * programmed into ct0ca for execution.
+ */
+ struct list_head bin_job_list;
+
+ /* List of all struct vc4_exec_info for jobs that have
+ * completed binning and are ready for rendering. The first
+ * job in the list is the one currently programmed into ct1ca
+ * for execution.
+ */
+ struct list_head render_job_list;
+
+ /* List of the finished vc4_exec_infos waiting to be freed by
+ * job_done_work.
+ */
+ struct list_head job_done_list;
+ /* Spinlock used to synchronize the job_list and seqno
+ * accesses between the IRQ handler and GEM ioctls.
+ */
+ spinlock_t job_lock;
+ wait_queue_head_t job_wait_queue;
+ struct work_struct job_done_work;
+
+ /* Used to track the active perfmon if any. Access to this field is
+ * protected by job_lock.
+ */
+ struct vc4_perfmon *active_perfmon;
+
+ /* List of struct vc4_seqno_cb for callbacks to be made from a
+ * workqueue when the given seqno is passed.
+ */
+ struct list_head seqno_cb_list;
+
+ /* The memory used for storing binner tile alloc, tile state,
+ * and overflow memory allocations. This is freed when V3D
+ * powers down.
+ */
+ struct vc4_bo *bin_bo;
+
+ /* Size of blocks allocated within bin_bo. */
+ uint32_t bin_alloc_size;
+
+ /* Bitmask of the bin_alloc_size chunks in bin_bo that are
+ * used.
+ */
+ uint32_t bin_alloc_used;
+
+ /* Bitmask of the current bin_alloc used for overflow memory. */
+ uint32_t bin_alloc_overflow;
+
+ struct work_struct overflow_mem_work;
+
+ int power_refcount;
+
+ /* Mutex controlling the power refcount. */
+ struct mutex power_lock;
+
+ struct {
+ struct timer_list timer;
+ struct work_struct reset_work;
+ } hangcheck;
+
+ struct semaphore async_modeset;
+
+ struct drm_modeset_lock ctm_state_lock;
+ struct drm_private_obj ctm_manager;
+};
+
+static inline struct vc4_dev *
+to_vc4_dev(struct drm_device *dev)
+{
+ return (struct vc4_dev *)dev->dev_private;
+}
+
+struct vc4_bo {
+ struct drm_gem_cma_object base;
+
+ /* seqno of the last job to render using this BO. */
+ uint64_t seqno;
+
+ /* seqno of the last job to use the RCL to write to this BO.
+ *
+ * Note that this doesn't include binner overflow memory
+ * writes.
+ */
+ uint64_t write_seqno;
+
+ bool t_format;
+
+ /* List entry for the BO's position in either
+ * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list
+ */
+ struct list_head unref_head;
+
+ /* Time in jiffies when the BO was put in vc4->bo_cache. */
+ unsigned long free_time;
+
+ /* List entry for the BO's position in vc4_dev->bo_cache.size_list */
+ struct list_head size_head;
+
+ /* Struct for shader validation state, if created by
+ * DRM_IOCTL_VC4_CREATE_SHADER_BO.
+ */
+ struct vc4_validated_shader_info *validated_shader;
+
+ /* normally (resv == &_resv) except for imported bo's */
+ struct reservation_object *resv;
+ struct reservation_object _resv;
+
+ /* One of enum vc4_kernel_bo_type, or VC4_BO_TYPE_COUNT + i
+ * for user-allocated labels.
+ */
+ int label;
+
+ /* Count the number of active users. This is needed to determine
+ * whether we can move the BO to the purgeable list or not (when the BO
+ * is used by the GPU or the display engine we can't purge it).
+ */
+ refcount_t usecnt;
+
+ /* Store purgeable/purged state here */
+ u32 madv;
+ struct mutex madv_lock;
+};
+
+static inline struct vc4_bo *
+to_vc4_bo(struct drm_gem_object *bo)
+{
+ return (struct vc4_bo *)bo;
+}
+
+struct vc4_fence {
+ struct dma_fence base;
+ struct drm_device *dev;
+ /* vc4 seqno for signaled() test */
+ uint64_t seqno;
+};
+
+static inline struct vc4_fence *
+to_vc4_fence(struct dma_fence *fence)
+{
+ return (struct vc4_fence *)fence;
+}
+
+struct vc4_seqno_cb {
+ struct work_struct work;
+ uint64_t seqno;
+ void (*func)(struct vc4_seqno_cb *cb);
+};
+
+struct vc4_v3d {
+ struct vc4_dev *vc4;
+ struct platform_device *pdev;
+ void __iomem *regs;
+ struct clk *clk;
+};
+
+struct vc4_hvs {
+ struct platform_device *pdev;
+ void __iomem *regs;
+ u32 __iomem *dlist;
+
+ /* Memory manager for CRTCs to allocate space in the display
+ * list. Units are dwords.
+ */
+ struct drm_mm dlist_mm;
+ /* Memory manager for the LBM memory used by HVS scaling. */
+ struct drm_mm lbm_mm;
+ spinlock_t mm_lock;
+
+ struct drm_mm_node mitchell_netravali_filter;
+};
+
+struct vc4_plane {
+ struct drm_plane base;
+};
+
+static inline struct vc4_plane *
+to_vc4_plane(struct drm_plane *plane)
+{
+ return (struct vc4_plane *)plane;
+}
+
+enum vc4_scaling_mode {
+ VC4_SCALING_NONE,
+ VC4_SCALING_TPZ,
+ VC4_SCALING_PPF,
+};
+
+struct vc4_plane_state {
+ struct drm_plane_state base;
+ /* System memory copy of the display list for this element, computed
+ * at atomic_check time.
+ */
+ u32 *dlist;
+ u32 dlist_size; /* Number of dwords allocated for the display list */
+ u32 dlist_count; /* Number of used dwords in the display list. */
+
+ /* Offset in the dlist to various words, for pageflip or
+ * cursor updates.
+ */
+ u32 pos0_offset;
+ u32 pos2_offset;
+ u32 ptr0_offset;
+
+ /* Offset where the plane's dlist was last stored in the
+ * hardware at vc4_crtc_atomic_flush() time.
+ */
+ u32 __iomem *hw_dlist;
+
+ /* Clipped coordinates of the plane on the display. */
+ int crtc_x, crtc_y, crtc_w, crtc_h;
+ /* Clipped area being scanned from in the FB. */
+ u32 src_x, src_y;
+
+ u32 src_w[2], src_h[2];
+
+ /* Scaling selection for the RGB/Y plane and the Cb/Cr planes. */
+ enum vc4_scaling_mode x_scaling[2], y_scaling[2];
+ bool is_unity;
+ bool is_yuv;
+
+ /* Offset to start scanning out from the start of the plane's
+ * BO.
+ */
+ u32 offsets[3];
+
+ /* Our allocation in LBM for temporary storage during scaling. */
+ struct drm_mm_node lbm;
+
+ /* Set when the plane has per-pixel alpha content or does not cover
+ * the entire screen. This is a hint to the CRTC that it might need
+ * to enable background color fill.
+ */
+ bool needs_bg_fill;
+};
+
+static inline struct vc4_plane_state *
+to_vc4_plane_state(struct drm_plane_state *state)
+{
+ return (struct vc4_plane_state *)state;
+}
+
+enum vc4_encoder_type {
+ VC4_ENCODER_TYPE_NONE,
+ VC4_ENCODER_TYPE_HDMI,
+ VC4_ENCODER_TYPE_VEC,
+ VC4_ENCODER_TYPE_DSI0,
+ VC4_ENCODER_TYPE_DSI1,
+ VC4_ENCODER_TYPE_SMI,
+ VC4_ENCODER_TYPE_DPI,
+};
+
+struct vc4_encoder {
+ struct drm_encoder base;
+ enum vc4_encoder_type type;
+ u32 clock_select;
+};
+
+static inline struct vc4_encoder *
+to_vc4_encoder(struct drm_encoder *encoder)
+{
+ return container_of(encoder, struct vc4_encoder, base);
+}
+
+struct vc4_crtc_data {
+ /* Which channel of the HVS this pixelvalve sources from. */
+ int hvs_channel;
+
+ enum vc4_encoder_type encoder_types[4];
+};
+
+struct vc4_crtc {
+ struct drm_crtc base;
+ const struct vc4_crtc_data *data;
+ void __iomem *regs;
+
+ /* Timestamp at start of vblank irq - unaffected by lock delays. */
+ ktime_t t_vblank;
+
+ /* Which HVS channel we're using for our CRTC. */
+ int channel;
+
+ u8 lut_r[256];
+ u8 lut_g[256];
+ u8 lut_b[256];
+ /* Size in pixels of the COB memory allocated to this CRTC. */
+ u32 cob_size;
+
+ struct drm_pending_vblank_event *event;
+};
+
+static inline struct vc4_crtc *
+to_vc4_crtc(struct drm_crtc *crtc)
+{
+ return (struct vc4_crtc *)crtc;
+}
+
+#define V3D_READ(offset) readl(vc4->v3d->regs + offset)
+#define V3D_WRITE(offset, val) writel(val, vc4->v3d->regs + offset)
+#define HVS_READ(offset) readl(vc4->hvs->regs + offset)
+#define HVS_WRITE(offset, val) writel(val, vc4->hvs->regs + offset)
+
+struct vc4_exec_info {
+ /* Sequence number for this bin/render job. */
+ uint64_t seqno;
+
+ /* Latest write_seqno of any BO that binning depends on. */
+ uint64_t bin_dep_seqno;
+
+ struct dma_fence *fence;
+
+ /* Last current addresses the hardware was processing when the
+ * hangcheck timer checked on us.
+ */
+ uint32_t last_ct0ca, last_ct1ca;
+
+ /* Kernel-space copy of the ioctl arguments */
+ struct drm_vc4_submit_cl *args;
+
+ /* This is the array of BOs that were looked up at the start of exec.
+ * Command validation will use indices into this array.
+ */
+ struct drm_gem_cma_object **bo;
+ uint32_t bo_count;
+
+ /* List of BOs that are being written by the RCL. Other than
+ * the binner temporary storage, this is all the BOs written
+ * by the job.
+ */
+ struct drm_gem_cma_object *rcl_write_bo[4];
+ uint32_t rcl_write_bo_count;
+
+ /* Pointers for our position in vc4->job_list */
+ struct list_head head;
+
+ /* List of other BOs used in the job that need to be released
+ * once the job is complete.
+ */
+ struct list_head unref_list;
+
+ /* Current unvalidated indices into @bo loaded by the non-hardware
+ * VC4_PACKET_GEM_HANDLES.
+ */
+ uint32_t bo_index[2];
+
+ /* This is the BO where we store the validated command lists, shader
+ * records, and uniforms.
+ */
+ struct drm_gem_cma_object *exec_bo;
+
+ /**
+ * This tracks the per-shader-record state (packet 64) that
+ * determines the length of the shader record and the offset
+ * it's expected to be found at. It gets read in from the
+ * command lists.
+ */
+ struct vc4_shader_state {
+ uint32_t addr;
+ /* Maximum vertex index referenced by any primitive using this
+ * shader state.
+ */
+ uint32_t max_index;
+ } *shader_state;
+
+ /** How many shader states the user declared they were using. */
+ uint32_t shader_state_size;
+ /** How many shader state records the validator has seen. */
+ uint32_t shader_state_count;
+
+ bool found_tile_binning_mode_config_packet;
+ bool found_start_tile_binning_packet;
+ bool found_increment_semaphore_packet;
+ bool found_flush;
+ uint8_t bin_tiles_x, bin_tiles_y;
+ /* Physical address of the start of the tile alloc array
+ * (where each tile's binned CL will start)
+ */
+ uint32_t tile_alloc_offset;
+ /* Bitmask of which binner slots are freed when this job completes. */
+ uint32_t bin_slots;
+
+ /**
+ * Computed addresses pointing into exec_bo where we start the
+ * bin thread (ct0) and render thread (ct1).
+ */
+ uint32_t ct0ca, ct0ea;
+ uint32_t ct1ca, ct1ea;
+
+ /* Pointer to the unvalidated bin CL (if present). */
+ void *bin_u;
+
+ /* Pointers to the shader recs. These paddr gets incremented as CL
+ * packets are relocated in validate_gl_shader_state, and the vaddrs
+ * (u and v) get incremented and size decremented as the shader recs
+ * themselves are validated.
+ */
+ void *shader_rec_u;
+ void *shader_rec_v;
+ uint32_t shader_rec_p;
+ uint32_t shader_rec_size;
+
+ /* Pointers to the uniform data. These pointers are incremented, and
+ * size decremented, as each batch of uniforms is uploaded.
+ */
+ void *uniforms_u;
+ void *uniforms_v;
+ uint32_t uniforms_p;
+ uint32_t uniforms_size;
+
+ /* Pointer to a performance monitor object if the user requested it,
+ * NULL otherwise.
+ */
+ struct vc4_perfmon *perfmon;
+};
+
+/* Per-open file private data. Any driver-specific resource that has to be
+ * released when the DRM file is closed should be placed here.
+ */
+struct vc4_file {
+ struct {
+ struct idr idr;
+ struct mutex lock;
+ } perfmon;
+};
+
+static inline struct vc4_exec_info *
+vc4_first_bin_job(struct vc4_dev *vc4)
+{
+ return list_first_entry_or_null(&vc4->bin_job_list,
+ struct vc4_exec_info, head);
+}
+
+static inline struct vc4_exec_info *
+vc4_first_render_job(struct vc4_dev *vc4)
+{
+ return list_first_entry_or_null(&vc4->render_job_list,
+ struct vc4_exec_info, head);
+}
+
+static inline struct vc4_exec_info *
+vc4_last_render_job(struct vc4_dev *vc4)
+{
+ if (list_empty(&vc4->render_job_list))
+ return NULL;
+ return list_last_entry(&vc4->render_job_list,
+ struct vc4_exec_info, head);
+}
+
+/**
+ * struct vc4_texture_sample_info - saves the offsets into the UBO for texture
+ * setup parameters.
+ *
+ * This will be used at draw time to relocate the reference to the texture
+ * contents in p0, and validate that the offset combined with
+ * width/height/stride/etc. from p1 and p2/p3 doesn't sample outside the BO.
+ * Note that the hardware treats unprovided config parameters as 0, so not all
+ * of them need to be set up for every texure sample, and we'll store ~0 as
+ * the offset to mark the unused ones.
+ *
+ * See the VC4 3D architecture guide page 41 ("Texture and Memory Lookup Unit
+ * Setup") for definitions of the texture parameters.
+ */
+struct vc4_texture_sample_info {
+ bool is_direct;
+ uint32_t p_offset[4];
+};
+
+/**
+ * struct vc4_validated_shader_info - information about validated shaders that
+ * needs to be used from command list validation.
+ *
+ * For a given shader, each time a shader state record references it, we need
+ * to verify that the shader doesn't read more uniforms than the shader state
+ * record's uniform BO pointer can provide, and we need to apply relocations
+ * and validate the shader state record's uniforms that define the texture
+ * samples.
+ */
+struct vc4_validated_shader_info {
+ uint32_t uniforms_size;
+ uint32_t uniforms_src_size;
+ uint32_t num_texture_samples;
+ struct vc4_texture_sample_info *texture_samples;
+
+ uint32_t num_uniform_addr_offsets;
+ uint32_t *uniform_addr_offsets;
+
+ bool is_threaded;
+};
+
+/**
+ * _wait_for - magic (register) wait macro
+ *
+ * Does the right thing for modeset paths when run under kdgb or similar atomic
+ * contexts. Note that it's important that we check the condition again after
+ * having timed out, since the timeout could be due to preemption or similar and
+ * we've never had a chance to check the condition before the timeout.
+ */
+#define _wait_for(COND, MS, W) ({ \
+ unsigned long timeout__ = jiffies + msecs_to_jiffies(MS) + 1; \
+ int ret__ = 0; \
+ while (!(COND)) { \
+ if (time_after(jiffies, timeout__)) { \
+ if (!(COND)) \
+ ret__ = -ETIMEDOUT; \
+ break; \
+ } \
+ if (W && drm_can_sleep()) { \
+ msleep(W); \
+ } else { \
+ cpu_relax(); \
+ } \
+ } \
+ ret__; \
+})
+
+#define wait_for(COND, MS) _wait_for(COND, MS, 1)
+
+/* vc4_bo.c */
+struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size);
+void vc4_free_object(struct drm_gem_object *gem_obj);
+struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size,
+ bool from_cache, enum vc4_kernel_bo_type type);
+int vc4_dumb_create(struct drm_file *file_priv,
+ struct drm_device *dev,
+ struct drm_mode_create_dumb *args);
+struct dma_buf *vc4_prime_export(struct drm_device *dev,
+ struct drm_gem_object *obj, int flags);
+int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int vc4_set_tiling_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int vc4_get_tiling_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int vc4_label_bo_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+vm_fault_t vc4_fault(struct vm_fault *vmf);
+int vc4_mmap(struct file *filp, struct vm_area_struct *vma);
+struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj);
+int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
+struct drm_gem_object *vc4_prime_import_sg_table(struct drm_device *dev,
+ struct dma_buf_attachment *attach,
+ struct sg_table *sgt);
+void *vc4_prime_vmap(struct drm_gem_object *obj);
+int vc4_bo_cache_init(struct drm_device *dev);
+void vc4_bo_cache_destroy(struct drm_device *dev);
+int vc4_bo_stats_debugfs(struct seq_file *m, void *arg);
+int vc4_bo_inc_usecnt(struct vc4_bo *bo);
+void vc4_bo_dec_usecnt(struct vc4_bo *bo);
+void vc4_bo_add_to_purgeable_pool(struct vc4_bo *bo);
+void vc4_bo_remove_from_purgeable_pool(struct vc4_bo *bo);
+
+/* vc4_crtc.c */
+extern struct platform_driver vc4_crtc_driver;
+int vc4_crtc_debugfs_regs(struct seq_file *m, void *arg);
+bool vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
+ bool in_vblank_irq, int *vpos, int *hpos,
+ ktime_t *stime, ktime_t *etime,
+ const struct drm_display_mode *mode);
+void vc4_crtc_handle_vblank(struct vc4_crtc *crtc);
+void vc4_crtc_txp_armed(struct drm_crtc_state *state);
+
+/* vc4_debugfs.c */
+int vc4_debugfs_init(struct drm_minor *minor);
+
+/* vc4_drv.c */
+void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index);
+
+/* vc4_dpi.c */
+extern struct platform_driver vc4_dpi_driver;
+int vc4_dpi_debugfs_regs(struct seq_file *m, void *unused);
+
+/* vc4_dsi.c */
+extern struct platform_driver vc4_dsi_driver;
+int vc4_dsi_debugfs_regs(struct seq_file *m, void *unused);
+
+/* vc4_fence.c */
+extern const struct dma_fence_ops vc4_fence_ops;
+
+/* vc4_gem.c */
+void vc4_gem_init(struct drm_device *dev);
+void vc4_gem_destroy(struct drm_device *dev);
+int vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+void vc4_submit_next_bin_job(struct drm_device *dev);
+void vc4_submit_next_render_job(struct drm_device *dev);
+void vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec);
+int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno,
+ uint64_t timeout_ns, bool interruptible);
+void vc4_job_handle_completed(struct vc4_dev *vc4);
+int vc4_queue_seqno_cb(struct drm_device *dev,
+ struct vc4_seqno_cb *cb, uint64_t seqno,
+ void (*func)(struct vc4_seqno_cb *cb));
+int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+
+/* vc4_hdmi.c */
+extern struct platform_driver vc4_hdmi_driver;
+int vc4_hdmi_debugfs_regs(struct seq_file *m, void *unused);
+
+/* vc4_vec.c */
+extern struct platform_driver vc4_vec_driver;
+int vc4_vec_debugfs_regs(struct seq_file *m, void *unused);
+
+/* vc4_txp.c */
+extern struct platform_driver vc4_txp_driver;
+int vc4_txp_debugfs_regs(struct seq_file *m, void *unused);
+
+/* vc4_irq.c */
+irqreturn_t vc4_irq(int irq, void *arg);
+void vc4_irq_preinstall(struct drm_device *dev);
+int vc4_irq_postinstall(struct drm_device *dev);
+void vc4_irq_uninstall(struct drm_device *dev);
+void vc4_irq_reset(struct drm_device *dev);
+
+/* vc4_hvs.c */
+extern struct platform_driver vc4_hvs_driver;
+void vc4_hvs_dump_state(struct drm_device *dev);
+int vc4_hvs_debugfs_regs(struct seq_file *m, void *unused);
+
+/* vc4_kms.c */
+int vc4_kms_load(struct drm_device *dev);
+
+/* vc4_plane.c */
+struct drm_plane *vc4_plane_init(struct drm_device *dev,
+ enum drm_plane_type type);
+u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist);
+u32 vc4_plane_dlist_size(const struct drm_plane_state *state);
+void vc4_plane_async_set_fb(struct drm_plane *plane,
+ struct drm_framebuffer *fb);
+
+/* vc4_v3d.c */
+extern struct platform_driver vc4_v3d_driver;
+int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused);
+int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused);
+int vc4_v3d_get_bin_slot(struct vc4_dev *vc4);
+
+/* vc4_validate.c */
+int
+vc4_validate_bin_cl(struct drm_device *dev,
+ void *validated,
+ void *unvalidated,
+ struct vc4_exec_info *exec);
+
+int
+vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec);
+
+struct drm_gem_cma_object *vc4_use_bo(struct vc4_exec_info *exec,
+ uint32_t hindex);
+
+int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec);
+
+bool vc4_check_tex_size(struct vc4_exec_info *exec,
+ struct drm_gem_cma_object *fbo,
+ uint32_t offset, uint8_t tiling_format,
+ uint32_t width, uint32_t height, uint8_t cpp);
+
+/* vc4_validate_shader.c */
+struct vc4_validated_shader_info *
+vc4_validate_shader(struct drm_gem_cma_object *shader_obj);
+
+/* vc4_perfmon.c */
+void vc4_perfmon_get(struct vc4_perfmon *perfmon);
+void vc4_perfmon_put(struct vc4_perfmon *perfmon);
+void vc4_perfmon_start(struct vc4_dev *vc4, struct vc4_perfmon *perfmon);
+void vc4_perfmon_stop(struct vc4_dev *vc4, struct vc4_perfmon *perfmon,
+ bool capture);
+struct vc4_perfmon *vc4_perfmon_find(struct vc4_file *vc4file, int id);
+void vc4_perfmon_open_file(struct vc4_file *vc4file);
+void vc4_perfmon_close_file(struct vc4_file *vc4file);
+int vc4_perfmon_create_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int vc4_perfmon_destroy_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int vc4_perfmon_get_values_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c
new file mode 100644
index 000000000..0c607eb33
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_dsi.c
@@ -0,0 +1,1744 @@
+/*
+ * Copyright (C) 2016 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * DOC: VC4 DSI0/DSI1 module
+ *
+ * BCM2835 contains two DSI modules, DSI0 and DSI1. DSI0 is a
+ * single-lane DSI controller, while DSI1 is a more modern 4-lane DSI
+ * controller.
+ *
+ * Most Raspberry Pi boards expose DSI1 as their "DISPLAY" connector,
+ * while the compute module brings both DSI0 and DSI1 out.
+ *
+ * This driver has been tested for DSI1 video-mode display only
+ * currently, with most of the information necessary for DSI0
+ * hopefully present.
+ */
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_of.h>
+#include <drm/drm_panel.h>
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/completion.h>
+#include <linux/component.h>
+#include <linux/dmaengine.h>
+#include <linux/i2c.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/pm_runtime.h>
+#include "vc4_drv.h"
+#include "vc4_regs.h"
+
+#define DSI_CMD_FIFO_DEPTH 16
+#define DSI_PIX_FIFO_DEPTH 256
+#define DSI_PIX_FIFO_WIDTH 4
+
+#define DSI0_CTRL 0x00
+
+/* Command packet control. */
+#define DSI0_TXPKT1C 0x04 /* AKA PKTC */
+#define DSI1_TXPKT1C 0x04
+# define DSI_TXPKT1C_TRIG_CMD_MASK VC4_MASK(31, 24)
+# define DSI_TXPKT1C_TRIG_CMD_SHIFT 24
+# define DSI_TXPKT1C_CMD_REPEAT_MASK VC4_MASK(23, 10)
+# define DSI_TXPKT1C_CMD_REPEAT_SHIFT 10
+
+# define DSI_TXPKT1C_DISPLAY_NO_MASK VC4_MASK(9, 8)
+# define DSI_TXPKT1C_DISPLAY_NO_SHIFT 8
+/* Short, trigger, BTA, or a long packet that fits all in CMDFIFO. */
+# define DSI_TXPKT1C_DISPLAY_NO_SHORT 0
+/* Primary display where cmdfifo provides part of the payload and
+ * pixelvalve the rest.
+ */
+# define DSI_TXPKT1C_DISPLAY_NO_PRIMARY 1
+/* Secondary display where cmdfifo provides part of the payload and
+ * pixfifo the rest.
+ */
+# define DSI_TXPKT1C_DISPLAY_NO_SECONDARY 2
+
+# define DSI_TXPKT1C_CMD_TX_TIME_MASK VC4_MASK(7, 6)
+# define DSI_TXPKT1C_CMD_TX_TIME_SHIFT 6
+
+# define DSI_TXPKT1C_CMD_CTRL_MASK VC4_MASK(5, 4)
+# define DSI_TXPKT1C_CMD_CTRL_SHIFT 4
+/* Command only. Uses TXPKT1H and DISPLAY_NO */
+# define DSI_TXPKT1C_CMD_CTRL_TX 0
+/* Command with BTA for either ack or read data. */
+# define DSI_TXPKT1C_CMD_CTRL_RX 1
+/* Trigger according to TRIG_CMD */
+# define DSI_TXPKT1C_CMD_CTRL_TRIG 2
+/* BTA alone for getting error status after a command, or a TE trigger
+ * without a previous command.
+ */
+# define DSI_TXPKT1C_CMD_CTRL_BTA 3
+
+# define DSI_TXPKT1C_CMD_MODE_LP BIT(3)
+# define DSI_TXPKT1C_CMD_TYPE_LONG BIT(2)
+# define DSI_TXPKT1C_CMD_TE_EN BIT(1)
+# define DSI_TXPKT1C_CMD_EN BIT(0)
+
+/* Command packet header. */
+#define DSI0_TXPKT1H 0x08 /* AKA PKTH */
+#define DSI1_TXPKT1H 0x08
+# define DSI_TXPKT1H_BC_CMDFIFO_MASK VC4_MASK(31, 24)
+# define DSI_TXPKT1H_BC_CMDFIFO_SHIFT 24
+# define DSI_TXPKT1H_BC_PARAM_MASK VC4_MASK(23, 8)
+# define DSI_TXPKT1H_BC_PARAM_SHIFT 8
+# define DSI_TXPKT1H_BC_DT_MASK VC4_MASK(7, 0)
+# define DSI_TXPKT1H_BC_DT_SHIFT 0
+
+#define DSI0_RXPKT1H 0x0c /* AKA RX1_PKTH */
+#define DSI1_RXPKT1H 0x14
+# define DSI_RXPKT1H_CRC_ERR BIT(31)
+# define DSI_RXPKT1H_DET_ERR BIT(30)
+# define DSI_RXPKT1H_ECC_ERR BIT(29)
+# define DSI_RXPKT1H_COR_ERR BIT(28)
+# define DSI_RXPKT1H_INCOMP_PKT BIT(25)
+# define DSI_RXPKT1H_PKT_TYPE_LONG BIT(24)
+/* Byte count if DSI_RXPKT1H_PKT_TYPE_LONG */
+# define DSI_RXPKT1H_BC_PARAM_MASK VC4_MASK(23, 8)
+# define DSI_RXPKT1H_BC_PARAM_SHIFT 8
+/* Short return bytes if !DSI_RXPKT1H_PKT_TYPE_LONG */
+# define DSI_RXPKT1H_SHORT_1_MASK VC4_MASK(23, 16)
+# define DSI_RXPKT1H_SHORT_1_SHIFT 16
+# define DSI_RXPKT1H_SHORT_0_MASK VC4_MASK(15, 8)
+# define DSI_RXPKT1H_SHORT_0_SHIFT 8
+# define DSI_RXPKT1H_DT_LP_CMD_MASK VC4_MASK(7, 0)
+# define DSI_RXPKT1H_DT_LP_CMD_SHIFT 0
+
+#define DSI0_RXPKT2H 0x10 /* AKA RX2_PKTH */
+#define DSI1_RXPKT2H 0x18
+# define DSI_RXPKT1H_DET_ERR BIT(30)
+# define DSI_RXPKT1H_ECC_ERR BIT(29)
+# define DSI_RXPKT1H_COR_ERR BIT(28)
+# define DSI_RXPKT1H_INCOMP_PKT BIT(25)
+# define DSI_RXPKT1H_BC_PARAM_MASK VC4_MASK(23, 8)
+# define DSI_RXPKT1H_BC_PARAM_SHIFT 8
+# define DSI_RXPKT1H_DT_MASK VC4_MASK(7, 0)
+# define DSI_RXPKT1H_DT_SHIFT 0
+
+#define DSI0_TXPKT_CMD_FIFO 0x14 /* AKA CMD_DATAF */
+#define DSI1_TXPKT_CMD_FIFO 0x1c
+
+#define DSI0_DISP0_CTRL 0x18
+# define DSI_DISP0_PIX_CLK_DIV_MASK VC4_MASK(21, 13)
+# define DSI_DISP0_PIX_CLK_DIV_SHIFT 13
+# define DSI_DISP0_LP_STOP_CTRL_MASK VC4_MASK(12, 11)
+# define DSI_DISP0_LP_STOP_CTRL_SHIFT 11
+# define DSI_DISP0_LP_STOP_DISABLE 0
+# define DSI_DISP0_LP_STOP_PERLINE 1
+# define DSI_DISP0_LP_STOP_PERFRAME 2
+
+/* Transmit RGB pixels and null packets only during HACTIVE, instead
+ * of going to LP-STOP.
+ */
+# define DSI_DISP_HACTIVE_NULL BIT(10)
+/* Transmit blanking packet only during vblank, instead of allowing LP-STOP. */
+# define DSI_DISP_VBLP_CTRL BIT(9)
+/* Transmit blanking packet only during HFP, instead of allowing LP-STOP. */
+# define DSI_DISP_HFP_CTRL BIT(8)
+/* Transmit blanking packet only during HBP, instead of allowing LP-STOP. */
+# define DSI_DISP_HBP_CTRL BIT(7)
+# define DSI_DISP0_CHANNEL_MASK VC4_MASK(6, 5)
+# define DSI_DISP0_CHANNEL_SHIFT 5
+/* Enables end events for HSYNC/VSYNC, not just start events. */
+# define DSI_DISP0_ST_END BIT(4)
+# define DSI_DISP0_PFORMAT_MASK VC4_MASK(3, 2)
+# define DSI_DISP0_PFORMAT_SHIFT 2
+# define DSI_PFORMAT_RGB565 0
+# define DSI_PFORMAT_RGB666_PACKED 1
+# define DSI_PFORMAT_RGB666 2
+# define DSI_PFORMAT_RGB888 3
+/* Default is VIDEO mode. */
+# define DSI_DISP0_COMMAND_MODE BIT(1)
+# define DSI_DISP0_ENABLE BIT(0)
+
+#define DSI0_DISP1_CTRL 0x1c
+#define DSI1_DISP1_CTRL 0x2c
+/* Format of the data written to TXPKT_PIX_FIFO. */
+# define DSI_DISP1_PFORMAT_MASK VC4_MASK(2, 1)
+# define DSI_DISP1_PFORMAT_SHIFT 1
+# define DSI_DISP1_PFORMAT_16BIT 0
+# define DSI_DISP1_PFORMAT_24BIT 1
+# define DSI_DISP1_PFORMAT_32BIT_LE 2
+# define DSI_DISP1_PFORMAT_32BIT_BE 3
+
+/* DISP1 is always command mode. */
+# define DSI_DISP1_ENABLE BIT(0)
+
+#define DSI0_TXPKT_PIX_FIFO 0x20 /* AKA PIX_FIFO */
+
+#define DSI0_INT_STAT 0x24
+#define DSI0_INT_EN 0x28
+# define DSI1_INT_PHY_D3_ULPS BIT(30)
+# define DSI1_INT_PHY_D3_STOP BIT(29)
+# define DSI1_INT_PHY_D2_ULPS BIT(28)
+# define DSI1_INT_PHY_D2_STOP BIT(27)
+# define DSI1_INT_PHY_D1_ULPS BIT(26)
+# define DSI1_INT_PHY_D1_STOP BIT(25)
+# define DSI1_INT_PHY_D0_ULPS BIT(24)
+# define DSI1_INT_PHY_D0_STOP BIT(23)
+# define DSI1_INT_FIFO_ERR BIT(22)
+# define DSI1_INT_PHY_DIR_RTF BIT(21)
+# define DSI1_INT_PHY_RXLPDT BIT(20)
+# define DSI1_INT_PHY_RXTRIG BIT(19)
+# define DSI1_INT_PHY_D0_LPDT BIT(18)
+# define DSI1_INT_PHY_DIR_FTR BIT(17)
+
+/* Signaled when the clock lane enters the given state. */
+# define DSI1_INT_PHY_CLOCK_ULPS BIT(16)
+# define DSI1_INT_PHY_CLOCK_HS BIT(15)
+# define DSI1_INT_PHY_CLOCK_STOP BIT(14)
+
+/* Signaled on timeouts */
+# define DSI1_INT_PR_TO BIT(13)
+# define DSI1_INT_TA_TO BIT(12)
+# define DSI1_INT_LPRX_TO BIT(11)
+# define DSI1_INT_HSTX_TO BIT(10)
+
+/* Contention on a line when trying to drive the line low */
+# define DSI1_INT_ERR_CONT_LP1 BIT(9)
+# define DSI1_INT_ERR_CONT_LP0 BIT(8)
+
+/* Control error: incorrect line state sequence on data lane 0. */
+# define DSI1_INT_ERR_CONTROL BIT(7)
+/* LPDT synchronization error (bits received not a multiple of 8. */
+
+# define DSI1_INT_ERR_SYNC_ESC BIT(6)
+/* Signaled after receiving an error packet from the display in
+ * response to a read.
+ */
+# define DSI1_INT_RXPKT2 BIT(5)
+/* Signaled after receiving a packet. The header and optional short
+ * response will be in RXPKT1H, and a long response will be in the
+ * RXPKT_FIFO.
+ */
+# define DSI1_INT_RXPKT1 BIT(4)
+# define DSI1_INT_TXPKT2_DONE BIT(3)
+# define DSI1_INT_TXPKT2_END BIT(2)
+/* Signaled after all repeats of TXPKT1 are transferred. */
+# define DSI1_INT_TXPKT1_DONE BIT(1)
+/* Signaled after each TXPKT1 repeat is scheduled. */
+# define DSI1_INT_TXPKT1_END BIT(0)
+
+#define DSI1_INTERRUPTS_ALWAYS_ENABLED (DSI1_INT_ERR_SYNC_ESC | \
+ DSI1_INT_ERR_CONTROL | \
+ DSI1_INT_ERR_CONT_LP0 | \
+ DSI1_INT_ERR_CONT_LP1 | \
+ DSI1_INT_HSTX_TO | \
+ DSI1_INT_LPRX_TO | \
+ DSI1_INT_TA_TO | \
+ DSI1_INT_PR_TO)
+
+#define DSI0_STAT 0x2c
+#define DSI0_HSTX_TO_CNT 0x30
+#define DSI0_LPRX_TO_CNT 0x34
+#define DSI0_TA_TO_CNT 0x38
+#define DSI0_PR_TO_CNT 0x3c
+#define DSI0_PHYC 0x40
+# define DSI1_PHYC_ESC_CLK_LPDT_MASK VC4_MASK(25, 20)
+# define DSI1_PHYC_ESC_CLK_LPDT_SHIFT 20
+# define DSI1_PHYC_HS_CLK_CONTINUOUS BIT(18)
+# define DSI0_PHYC_ESC_CLK_LPDT_MASK VC4_MASK(17, 12)
+# define DSI0_PHYC_ESC_CLK_LPDT_SHIFT 12
+# define DSI1_PHYC_CLANE_ULPS BIT(17)
+# define DSI1_PHYC_CLANE_ENABLE BIT(16)
+# define DSI_PHYC_DLANE3_ULPS BIT(13)
+# define DSI_PHYC_DLANE3_ENABLE BIT(12)
+# define DSI0_PHYC_HS_CLK_CONTINUOUS BIT(10)
+# define DSI0_PHYC_CLANE_ULPS BIT(9)
+# define DSI_PHYC_DLANE2_ULPS BIT(9)
+# define DSI0_PHYC_CLANE_ENABLE BIT(8)
+# define DSI_PHYC_DLANE2_ENABLE BIT(8)
+# define DSI_PHYC_DLANE1_ULPS BIT(5)
+# define DSI_PHYC_DLANE1_ENABLE BIT(4)
+# define DSI_PHYC_DLANE0_FORCE_STOP BIT(2)
+# define DSI_PHYC_DLANE0_ULPS BIT(1)
+# define DSI_PHYC_DLANE0_ENABLE BIT(0)
+
+#define DSI0_HS_CLT0 0x44
+#define DSI0_HS_CLT1 0x48
+#define DSI0_HS_CLT2 0x4c
+#define DSI0_HS_DLT3 0x50
+#define DSI0_HS_DLT4 0x54
+#define DSI0_HS_DLT5 0x58
+#define DSI0_HS_DLT6 0x5c
+#define DSI0_HS_DLT7 0x60
+
+#define DSI0_PHY_AFEC0 0x64
+# define DSI0_PHY_AFEC0_DDR2CLK_EN BIT(26)
+# define DSI0_PHY_AFEC0_DDRCLK_EN BIT(25)
+# define DSI0_PHY_AFEC0_LATCH_ULPS BIT(24)
+# define DSI1_PHY_AFEC0_IDR_DLANE3_MASK VC4_MASK(31, 29)
+# define DSI1_PHY_AFEC0_IDR_DLANE3_SHIFT 29
+# define DSI1_PHY_AFEC0_IDR_DLANE2_MASK VC4_MASK(28, 26)
+# define DSI1_PHY_AFEC0_IDR_DLANE2_SHIFT 26
+# define DSI1_PHY_AFEC0_IDR_DLANE1_MASK VC4_MASK(27, 23)
+# define DSI1_PHY_AFEC0_IDR_DLANE1_SHIFT 23
+# define DSI1_PHY_AFEC0_IDR_DLANE0_MASK VC4_MASK(22, 20)
+# define DSI1_PHY_AFEC0_IDR_DLANE0_SHIFT 20
+# define DSI1_PHY_AFEC0_IDR_CLANE_MASK VC4_MASK(19, 17)
+# define DSI1_PHY_AFEC0_IDR_CLANE_SHIFT 17
+# define DSI0_PHY_AFEC0_ACTRL_DLANE1_MASK VC4_MASK(23, 20)
+# define DSI0_PHY_AFEC0_ACTRL_DLANE1_SHIFT 20
+# define DSI0_PHY_AFEC0_ACTRL_DLANE0_MASK VC4_MASK(19, 16)
+# define DSI0_PHY_AFEC0_ACTRL_DLANE0_SHIFT 16
+# define DSI0_PHY_AFEC0_ACTRL_CLANE_MASK VC4_MASK(15, 12)
+# define DSI0_PHY_AFEC0_ACTRL_CLANE_SHIFT 12
+# define DSI1_PHY_AFEC0_DDR2CLK_EN BIT(16)
+# define DSI1_PHY_AFEC0_DDRCLK_EN BIT(15)
+# define DSI1_PHY_AFEC0_LATCH_ULPS BIT(14)
+# define DSI1_PHY_AFEC0_RESET BIT(13)
+# define DSI1_PHY_AFEC0_PD BIT(12)
+# define DSI0_PHY_AFEC0_RESET BIT(11)
+# define DSI1_PHY_AFEC0_PD_BG BIT(11)
+# define DSI0_PHY_AFEC0_PD BIT(10)
+# define DSI1_PHY_AFEC0_PD_DLANE3 BIT(10)
+# define DSI0_PHY_AFEC0_PD_BG BIT(9)
+# define DSI1_PHY_AFEC0_PD_DLANE2 BIT(9)
+# define DSI0_PHY_AFEC0_PD_DLANE1 BIT(8)
+# define DSI1_PHY_AFEC0_PD_DLANE1 BIT(8)
+# define DSI_PHY_AFEC0_PTATADJ_MASK VC4_MASK(7, 4)
+# define DSI_PHY_AFEC0_PTATADJ_SHIFT 4
+# define DSI_PHY_AFEC0_CTATADJ_MASK VC4_MASK(3, 0)
+# define DSI_PHY_AFEC0_CTATADJ_SHIFT 0
+
+#define DSI0_PHY_AFEC1 0x68
+# define DSI0_PHY_AFEC1_IDR_DLANE1_MASK VC4_MASK(10, 8)
+# define DSI0_PHY_AFEC1_IDR_DLANE1_SHIFT 8
+# define DSI0_PHY_AFEC1_IDR_DLANE0_MASK VC4_MASK(6, 4)
+# define DSI0_PHY_AFEC1_IDR_DLANE0_SHIFT 4
+# define DSI0_PHY_AFEC1_IDR_CLANE_MASK VC4_MASK(2, 0)
+# define DSI0_PHY_AFEC1_IDR_CLANE_SHIFT 0
+
+#define DSI0_TST_SEL 0x6c
+#define DSI0_TST_MON 0x70
+#define DSI0_ID 0x74
+# define DSI_ID_VALUE 0x00647369
+
+#define DSI1_CTRL 0x00
+# define DSI_CTRL_HS_CLKC_MASK VC4_MASK(15, 14)
+# define DSI_CTRL_HS_CLKC_SHIFT 14
+# define DSI_CTRL_HS_CLKC_BYTE 0
+# define DSI_CTRL_HS_CLKC_DDR2 1
+# define DSI_CTRL_HS_CLKC_DDR 2
+
+# define DSI_CTRL_RX_LPDT_EOT_DISABLE BIT(13)
+# define DSI_CTRL_LPDT_EOT_DISABLE BIT(12)
+# define DSI_CTRL_HSDT_EOT_DISABLE BIT(11)
+# define DSI_CTRL_SOFT_RESET_CFG BIT(10)
+# define DSI_CTRL_CAL_BYTE BIT(9)
+# define DSI_CTRL_INV_BYTE BIT(8)
+# define DSI_CTRL_CLR_LDF BIT(7)
+# define DSI0_CTRL_CLR_PBCF BIT(6)
+# define DSI1_CTRL_CLR_RXF BIT(6)
+# define DSI0_CTRL_CLR_CPBCF BIT(5)
+# define DSI1_CTRL_CLR_PDF BIT(5)
+# define DSI0_CTRL_CLR_PDF BIT(4)
+# define DSI1_CTRL_CLR_CDF BIT(4)
+# define DSI0_CTRL_CLR_CDF BIT(3)
+# define DSI0_CTRL_CTRL2 BIT(2)
+# define DSI1_CTRL_DISABLE_DISP_CRCC BIT(2)
+# define DSI0_CTRL_CTRL1 BIT(1)
+# define DSI1_CTRL_DISABLE_DISP_ECCC BIT(1)
+# define DSI0_CTRL_CTRL0 BIT(0)
+# define DSI1_CTRL_EN BIT(0)
+# define DSI0_CTRL_RESET_FIFOS (DSI_CTRL_CLR_LDF | \
+ DSI0_CTRL_CLR_PBCF | \
+ DSI0_CTRL_CLR_CPBCF | \
+ DSI0_CTRL_CLR_PDF | \
+ DSI0_CTRL_CLR_CDF)
+# define DSI1_CTRL_RESET_FIFOS (DSI_CTRL_CLR_LDF | \
+ DSI1_CTRL_CLR_RXF | \
+ DSI1_CTRL_CLR_PDF | \
+ DSI1_CTRL_CLR_CDF)
+
+#define DSI1_TXPKT2C 0x0c
+#define DSI1_TXPKT2H 0x10
+#define DSI1_TXPKT_PIX_FIFO 0x20
+#define DSI1_RXPKT_FIFO 0x24
+#define DSI1_DISP0_CTRL 0x28
+#define DSI1_INT_STAT 0x30
+#define DSI1_INT_EN 0x34
+/* State reporting bits. These mostly behave like INT_STAT, where
+ * writing a 1 clears the bit.
+ */
+#define DSI1_STAT 0x38
+# define DSI1_STAT_PHY_D3_ULPS BIT(31)
+# define DSI1_STAT_PHY_D3_STOP BIT(30)
+# define DSI1_STAT_PHY_D2_ULPS BIT(29)
+# define DSI1_STAT_PHY_D2_STOP BIT(28)
+# define DSI1_STAT_PHY_D1_ULPS BIT(27)
+# define DSI1_STAT_PHY_D1_STOP BIT(26)
+# define DSI1_STAT_PHY_D0_ULPS BIT(25)
+# define DSI1_STAT_PHY_D0_STOP BIT(24)
+# define DSI1_STAT_FIFO_ERR BIT(23)
+# define DSI1_STAT_PHY_RXLPDT BIT(22)
+# define DSI1_STAT_PHY_RXTRIG BIT(21)
+# define DSI1_STAT_PHY_D0_LPDT BIT(20)
+/* Set when in forward direction */
+# define DSI1_STAT_PHY_DIR BIT(19)
+# define DSI1_STAT_PHY_CLOCK_ULPS BIT(18)
+# define DSI1_STAT_PHY_CLOCK_HS BIT(17)
+# define DSI1_STAT_PHY_CLOCK_STOP BIT(16)
+# define DSI1_STAT_PR_TO BIT(15)
+# define DSI1_STAT_TA_TO BIT(14)
+# define DSI1_STAT_LPRX_TO BIT(13)
+# define DSI1_STAT_HSTX_TO BIT(12)
+# define DSI1_STAT_ERR_CONT_LP1 BIT(11)
+# define DSI1_STAT_ERR_CONT_LP0 BIT(10)
+# define DSI1_STAT_ERR_CONTROL BIT(9)
+# define DSI1_STAT_ERR_SYNC_ESC BIT(8)
+# define DSI1_STAT_RXPKT2 BIT(7)
+# define DSI1_STAT_RXPKT1 BIT(6)
+# define DSI1_STAT_TXPKT2_BUSY BIT(5)
+# define DSI1_STAT_TXPKT2_DONE BIT(4)
+# define DSI1_STAT_TXPKT2_END BIT(3)
+# define DSI1_STAT_TXPKT1_BUSY BIT(2)
+# define DSI1_STAT_TXPKT1_DONE BIT(1)
+# define DSI1_STAT_TXPKT1_END BIT(0)
+
+#define DSI1_HSTX_TO_CNT 0x3c
+#define DSI1_LPRX_TO_CNT 0x40
+#define DSI1_TA_TO_CNT 0x44
+#define DSI1_PR_TO_CNT 0x48
+#define DSI1_PHYC 0x4c
+
+#define DSI1_HS_CLT0 0x50
+# define DSI_HS_CLT0_CZERO_MASK VC4_MASK(26, 18)
+# define DSI_HS_CLT0_CZERO_SHIFT 18
+# define DSI_HS_CLT0_CPRE_MASK VC4_MASK(17, 9)
+# define DSI_HS_CLT0_CPRE_SHIFT 9
+# define DSI_HS_CLT0_CPREP_MASK VC4_MASK(8, 0)
+# define DSI_HS_CLT0_CPREP_SHIFT 0
+
+#define DSI1_HS_CLT1 0x54
+# define DSI_HS_CLT1_CTRAIL_MASK VC4_MASK(17, 9)
+# define DSI_HS_CLT1_CTRAIL_SHIFT 9
+# define DSI_HS_CLT1_CPOST_MASK VC4_MASK(8, 0)
+# define DSI_HS_CLT1_CPOST_SHIFT 0
+
+#define DSI1_HS_CLT2 0x58
+# define DSI_HS_CLT2_WUP_MASK VC4_MASK(23, 0)
+# define DSI_HS_CLT2_WUP_SHIFT 0
+
+#define DSI1_HS_DLT3 0x5c
+# define DSI_HS_DLT3_EXIT_MASK VC4_MASK(26, 18)
+# define DSI_HS_DLT3_EXIT_SHIFT 18
+# define DSI_HS_DLT3_ZERO_MASK VC4_MASK(17, 9)
+# define DSI_HS_DLT3_ZERO_SHIFT 9
+# define DSI_HS_DLT3_PRE_MASK VC4_MASK(8, 0)
+# define DSI_HS_DLT3_PRE_SHIFT 0
+
+#define DSI1_HS_DLT4 0x60
+# define DSI_HS_DLT4_ANLAT_MASK VC4_MASK(22, 18)
+# define DSI_HS_DLT4_ANLAT_SHIFT 18
+# define DSI_HS_DLT4_TRAIL_MASK VC4_MASK(17, 9)
+# define DSI_HS_DLT4_TRAIL_SHIFT 9
+# define DSI_HS_DLT4_LPX_MASK VC4_MASK(8, 0)
+# define DSI_HS_DLT4_LPX_SHIFT 0
+
+#define DSI1_HS_DLT5 0x64
+# define DSI_HS_DLT5_INIT_MASK VC4_MASK(23, 0)
+# define DSI_HS_DLT5_INIT_SHIFT 0
+
+#define DSI1_HS_DLT6 0x68
+# define DSI_HS_DLT6_TA_GET_MASK VC4_MASK(31, 24)
+# define DSI_HS_DLT6_TA_GET_SHIFT 24
+# define DSI_HS_DLT6_TA_SURE_MASK VC4_MASK(23, 16)
+# define DSI_HS_DLT6_TA_SURE_SHIFT 16
+# define DSI_HS_DLT6_TA_GO_MASK VC4_MASK(15, 8)
+# define DSI_HS_DLT6_TA_GO_SHIFT 8
+# define DSI_HS_DLT6_LP_LPX_MASK VC4_MASK(7, 0)
+# define DSI_HS_DLT6_LP_LPX_SHIFT 0
+
+#define DSI1_HS_DLT7 0x6c
+# define DSI_HS_DLT7_LP_WUP_MASK VC4_MASK(23, 0)
+# define DSI_HS_DLT7_LP_WUP_SHIFT 0
+
+#define DSI1_PHY_AFEC0 0x70
+
+#define DSI1_PHY_AFEC1 0x74
+# define DSI1_PHY_AFEC1_ACTRL_DLANE3_MASK VC4_MASK(19, 16)
+# define DSI1_PHY_AFEC1_ACTRL_DLANE3_SHIFT 16
+# define DSI1_PHY_AFEC1_ACTRL_DLANE2_MASK VC4_MASK(15, 12)
+# define DSI1_PHY_AFEC1_ACTRL_DLANE2_SHIFT 12
+# define DSI1_PHY_AFEC1_ACTRL_DLANE1_MASK VC4_MASK(11, 8)
+# define DSI1_PHY_AFEC1_ACTRL_DLANE1_SHIFT 8
+# define DSI1_PHY_AFEC1_ACTRL_DLANE0_MASK VC4_MASK(7, 4)
+# define DSI1_PHY_AFEC1_ACTRL_DLANE0_SHIFT 4
+# define DSI1_PHY_AFEC1_ACTRL_CLANE_MASK VC4_MASK(3, 0)
+# define DSI1_PHY_AFEC1_ACTRL_CLANE_SHIFT 0
+
+#define DSI1_TST_SEL 0x78
+#define DSI1_TST_MON 0x7c
+#define DSI1_PHY_TST1 0x80
+#define DSI1_PHY_TST2 0x84
+#define DSI1_PHY_FIFO_STAT 0x88
+/* Actually, all registers in the range that aren't otherwise claimed
+ * will return the ID.
+ */
+#define DSI1_ID 0x8c
+
+/* General DSI hardware state. */
+struct vc4_dsi {
+ struct platform_device *pdev;
+
+ struct mipi_dsi_host dsi_host;
+ struct drm_encoder *encoder;
+ struct drm_bridge *bridge;
+
+ void __iomem *regs;
+
+ struct dma_chan *reg_dma_chan;
+ dma_addr_t reg_dma_paddr;
+ u32 *reg_dma_mem;
+ dma_addr_t reg_paddr;
+
+ /* Whether we're on bcm2835's DSI0 or DSI1. */
+ int port;
+
+ /* DSI channel for the panel we're connected to. */
+ u32 channel;
+ u32 lanes;
+ u32 format;
+ u32 divider;
+ u32 mode_flags;
+
+ /* Input clock from CPRMAN to the digital PHY, for the DSI
+ * escape clock.
+ */
+ struct clk *escape_clock;
+
+ /* Input clock to the analog PHY, used to generate the DSI bit
+ * clock.
+ */
+ struct clk *pll_phy_clock;
+
+ /* HS Clocks generated within the DSI analog PHY. */
+ struct clk_fixed_factor phy_clocks[3];
+
+ struct clk_hw_onecell_data *clk_onecell;
+
+ /* Pixel clock output to the pixelvalve, generated from the HS
+ * clock.
+ */
+ struct clk *pixel_clock;
+
+ struct completion xfer_completion;
+ int xfer_result;
+};
+
+#define host_to_dsi(host) container_of(host, struct vc4_dsi, dsi_host)
+
+static inline void
+dsi_dma_workaround_write(struct vc4_dsi *dsi, u32 offset, u32 val)
+{
+ struct dma_chan *chan = dsi->reg_dma_chan;
+ struct dma_async_tx_descriptor *tx;
+ dma_cookie_t cookie;
+ int ret;
+
+ /* DSI0 should be able to write normally. */
+ if (!chan) {
+ writel(val, dsi->regs + offset);
+ return;
+ }
+
+ *dsi->reg_dma_mem = val;
+
+ tx = chan->device->device_prep_dma_memcpy(chan,
+ dsi->reg_paddr + offset,
+ dsi->reg_dma_paddr,
+ 4, 0);
+ if (!tx) {
+ DRM_ERROR("Failed to set up DMA register write\n");
+ return;
+ }
+
+ cookie = tx->tx_submit(tx);
+ ret = dma_submit_error(cookie);
+ if (ret) {
+ DRM_ERROR("Failed to submit DMA: %d\n", ret);
+ return;
+ }
+ ret = dma_sync_wait(chan, cookie);
+ if (ret)
+ DRM_ERROR("Failed to wait for DMA: %d\n", ret);
+}
+
+#define DSI_READ(offset) readl(dsi->regs + (offset))
+#define DSI_WRITE(offset, val) dsi_dma_workaround_write(dsi, offset, val)
+#define DSI_PORT_READ(offset) \
+ DSI_READ(dsi->port ? DSI1_##offset : DSI0_##offset)
+#define DSI_PORT_WRITE(offset, val) \
+ DSI_WRITE(dsi->port ? DSI1_##offset : DSI0_##offset, val)
+#define DSI_PORT_BIT(bit) (dsi->port ? DSI1_##bit : DSI0_##bit)
+
+/* VC4 DSI encoder KMS struct */
+struct vc4_dsi_encoder {
+ struct vc4_encoder base;
+ struct vc4_dsi *dsi;
+};
+
+static inline struct vc4_dsi_encoder *
+to_vc4_dsi_encoder(struct drm_encoder *encoder)
+{
+ return container_of(encoder, struct vc4_dsi_encoder, base.base);
+}
+
+#define DSI_REG(reg) { reg, #reg }
+static const struct {
+ u32 reg;
+ const char *name;
+} dsi0_regs[] = {
+ DSI_REG(DSI0_CTRL),
+ DSI_REG(DSI0_STAT),
+ DSI_REG(DSI0_HSTX_TO_CNT),
+ DSI_REG(DSI0_LPRX_TO_CNT),
+ DSI_REG(DSI0_TA_TO_CNT),
+ DSI_REG(DSI0_PR_TO_CNT),
+ DSI_REG(DSI0_DISP0_CTRL),
+ DSI_REG(DSI0_DISP1_CTRL),
+ DSI_REG(DSI0_INT_STAT),
+ DSI_REG(DSI0_INT_EN),
+ DSI_REG(DSI0_PHYC),
+ DSI_REG(DSI0_HS_CLT0),
+ DSI_REG(DSI0_HS_CLT1),
+ DSI_REG(DSI0_HS_CLT2),
+ DSI_REG(DSI0_HS_DLT3),
+ DSI_REG(DSI0_HS_DLT4),
+ DSI_REG(DSI0_HS_DLT5),
+ DSI_REG(DSI0_HS_DLT6),
+ DSI_REG(DSI0_HS_DLT7),
+ DSI_REG(DSI0_PHY_AFEC0),
+ DSI_REG(DSI0_PHY_AFEC1),
+ DSI_REG(DSI0_ID),
+};
+
+static const struct {
+ u32 reg;
+ const char *name;
+} dsi1_regs[] = {
+ DSI_REG(DSI1_CTRL),
+ DSI_REG(DSI1_STAT),
+ DSI_REG(DSI1_HSTX_TO_CNT),
+ DSI_REG(DSI1_LPRX_TO_CNT),
+ DSI_REG(DSI1_TA_TO_CNT),
+ DSI_REG(DSI1_PR_TO_CNT),
+ DSI_REG(DSI1_DISP0_CTRL),
+ DSI_REG(DSI1_DISP1_CTRL),
+ DSI_REG(DSI1_INT_STAT),
+ DSI_REG(DSI1_INT_EN),
+ DSI_REG(DSI1_PHYC),
+ DSI_REG(DSI1_HS_CLT0),
+ DSI_REG(DSI1_HS_CLT1),
+ DSI_REG(DSI1_HS_CLT2),
+ DSI_REG(DSI1_HS_DLT3),
+ DSI_REG(DSI1_HS_DLT4),
+ DSI_REG(DSI1_HS_DLT5),
+ DSI_REG(DSI1_HS_DLT6),
+ DSI_REG(DSI1_HS_DLT7),
+ DSI_REG(DSI1_PHY_AFEC0),
+ DSI_REG(DSI1_PHY_AFEC1),
+ DSI_REG(DSI1_ID),
+};
+
+static void vc4_dsi_dump_regs(struct vc4_dsi *dsi)
+{
+ int i;
+
+ if (dsi->port == 0) {
+ for (i = 0; i < ARRAY_SIZE(dsi0_regs); i++) {
+ DRM_INFO("0x%04x (%s): 0x%08x\n",
+ dsi0_regs[i].reg, dsi0_regs[i].name,
+ DSI_READ(dsi0_regs[i].reg));
+ }
+ } else {
+ for (i = 0; i < ARRAY_SIZE(dsi1_regs); i++) {
+ DRM_INFO("0x%04x (%s): 0x%08x\n",
+ dsi1_regs[i].reg, dsi1_regs[i].name,
+ DSI_READ(dsi1_regs[i].reg));
+ }
+ }
+}
+
+#ifdef CONFIG_DEBUG_FS
+int vc4_dsi_debugfs_regs(struct seq_file *m, void *unused)
+{
+ struct drm_info_node *node = (struct drm_info_node *)m->private;
+ struct drm_device *drm = node->minor->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(drm);
+ int dsi_index = (uintptr_t)node->info_ent->data;
+ struct vc4_dsi *dsi = (dsi_index == 1 ? vc4->dsi1 : NULL);
+ int i;
+
+ if (!dsi)
+ return 0;
+
+ if (dsi->port == 0) {
+ for (i = 0; i < ARRAY_SIZE(dsi0_regs); i++) {
+ seq_printf(m, "0x%04x (%s): 0x%08x\n",
+ dsi0_regs[i].reg, dsi0_regs[i].name,
+ DSI_READ(dsi0_regs[i].reg));
+ }
+ } else {
+ for (i = 0; i < ARRAY_SIZE(dsi1_regs); i++) {
+ seq_printf(m, "0x%04x (%s): 0x%08x\n",
+ dsi1_regs[i].reg, dsi1_regs[i].name,
+ DSI_READ(dsi1_regs[i].reg));
+ }
+ }
+
+ return 0;
+}
+#endif
+
+static void vc4_dsi_encoder_destroy(struct drm_encoder *encoder)
+{
+ drm_encoder_cleanup(encoder);
+}
+
+static const struct drm_encoder_funcs vc4_dsi_encoder_funcs = {
+ .destroy = vc4_dsi_encoder_destroy,
+};
+
+static void vc4_dsi_latch_ulps(struct vc4_dsi *dsi, bool latch)
+{
+ u32 afec0 = DSI_PORT_READ(PHY_AFEC0);
+
+ if (latch)
+ afec0 |= DSI_PORT_BIT(PHY_AFEC0_LATCH_ULPS);
+ else
+ afec0 &= ~DSI_PORT_BIT(PHY_AFEC0_LATCH_ULPS);
+
+ DSI_PORT_WRITE(PHY_AFEC0, afec0);
+}
+
+/* Enters or exits Ultra Low Power State. */
+static void vc4_dsi_ulps(struct vc4_dsi *dsi, bool ulps)
+{
+ bool non_continuous = dsi->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS;
+ u32 phyc_ulps = ((non_continuous ? DSI_PORT_BIT(PHYC_CLANE_ULPS) : 0) |
+ DSI_PHYC_DLANE0_ULPS |
+ (dsi->lanes > 1 ? DSI_PHYC_DLANE1_ULPS : 0) |
+ (dsi->lanes > 2 ? DSI_PHYC_DLANE2_ULPS : 0) |
+ (dsi->lanes > 3 ? DSI_PHYC_DLANE3_ULPS : 0));
+ u32 stat_ulps = ((non_continuous ? DSI1_STAT_PHY_CLOCK_ULPS : 0) |
+ DSI1_STAT_PHY_D0_ULPS |
+ (dsi->lanes > 1 ? DSI1_STAT_PHY_D1_ULPS : 0) |
+ (dsi->lanes > 2 ? DSI1_STAT_PHY_D2_ULPS : 0) |
+ (dsi->lanes > 3 ? DSI1_STAT_PHY_D3_ULPS : 0));
+ u32 stat_stop = ((non_continuous ? DSI1_STAT_PHY_CLOCK_STOP : 0) |
+ DSI1_STAT_PHY_D0_STOP |
+ (dsi->lanes > 1 ? DSI1_STAT_PHY_D1_STOP : 0) |
+ (dsi->lanes > 2 ? DSI1_STAT_PHY_D2_STOP : 0) |
+ (dsi->lanes > 3 ? DSI1_STAT_PHY_D3_STOP : 0));
+ int ret;
+ bool ulps_currently_enabled = (DSI_PORT_READ(PHY_AFEC0) &
+ DSI_PORT_BIT(PHY_AFEC0_LATCH_ULPS));
+
+ if (ulps == ulps_currently_enabled)
+ return;
+
+ DSI_PORT_WRITE(STAT, stat_ulps);
+ DSI_PORT_WRITE(PHYC, DSI_PORT_READ(PHYC) | phyc_ulps);
+ ret = wait_for((DSI_PORT_READ(STAT) & stat_ulps) == stat_ulps, 200);
+ if (ret) {
+ dev_warn(&dsi->pdev->dev,
+ "Timeout waiting for DSI ULPS entry: STAT 0x%08x",
+ DSI_PORT_READ(STAT));
+ DSI_PORT_WRITE(PHYC, DSI_PORT_READ(PHYC) & ~phyc_ulps);
+ vc4_dsi_latch_ulps(dsi, false);
+ return;
+ }
+
+ /* The DSI module can't be disabled while the module is
+ * generating ULPS state. So, to be able to disable the
+ * module, we have the AFE latch the ULPS state and continue
+ * on to having the module enter STOP.
+ */
+ vc4_dsi_latch_ulps(dsi, ulps);
+
+ DSI_PORT_WRITE(STAT, stat_stop);
+ DSI_PORT_WRITE(PHYC, DSI_PORT_READ(PHYC) & ~phyc_ulps);
+ ret = wait_for((DSI_PORT_READ(STAT) & stat_stop) == stat_stop, 200);
+ if (ret) {
+ dev_warn(&dsi->pdev->dev,
+ "Timeout waiting for DSI STOP entry: STAT 0x%08x",
+ DSI_PORT_READ(STAT));
+ DSI_PORT_WRITE(PHYC, DSI_PORT_READ(PHYC) & ~phyc_ulps);
+ return;
+ }
+}
+
+static u32
+dsi_hs_timing(u32 ui_ns, u32 ns, u32 ui)
+{
+ /* The HS timings have to be rounded up to a multiple of 8
+ * because we're using the byte clock.
+ */
+ return roundup(ui + DIV_ROUND_UP(ns, ui_ns), 8);
+}
+
+/* ESC always runs at 100Mhz. */
+#define ESC_TIME_NS 10
+
+static u32
+dsi_esc_timing(u32 ns)
+{
+ return DIV_ROUND_UP(ns, ESC_TIME_NS);
+}
+
+static void vc4_dsi_encoder_disable(struct drm_encoder *encoder)
+{
+ struct vc4_dsi_encoder *vc4_encoder = to_vc4_dsi_encoder(encoder);
+ struct vc4_dsi *dsi = vc4_encoder->dsi;
+ struct device *dev = &dsi->pdev->dev;
+
+ drm_bridge_disable(dsi->bridge);
+ vc4_dsi_ulps(dsi, true);
+ drm_bridge_post_disable(dsi->bridge);
+
+ clk_disable_unprepare(dsi->pll_phy_clock);
+ clk_disable_unprepare(dsi->escape_clock);
+ clk_disable_unprepare(dsi->pixel_clock);
+
+ pm_runtime_put(dev);
+}
+
+/* Extends the mode's blank intervals to handle BCM2835's integer-only
+ * DSI PLL divider.
+ *
+ * On 2835, PLLD is set to 2Ghz, and may not be changed by the display
+ * driver since most peripherals are hanging off of the PLLD_PER
+ * divider. PLLD_DSI1, which drives our DSI bit clock (and therefore
+ * the pixel clock), only has an integer divider off of DSI.
+ *
+ * To get our panel mode to refresh at the expected 60Hz, we need to
+ * extend the horizontal blank time. This means we drive a
+ * higher-than-expected clock rate to the panel, but that's what the
+ * firmware does too.
+ */
+static bool vc4_dsi_encoder_mode_fixup(struct drm_encoder *encoder,
+ const struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode)
+{
+ struct vc4_dsi_encoder *vc4_encoder = to_vc4_dsi_encoder(encoder);
+ struct vc4_dsi *dsi = vc4_encoder->dsi;
+ struct clk *phy_parent = clk_get_parent(dsi->pll_phy_clock);
+ unsigned long parent_rate = clk_get_rate(phy_parent);
+ unsigned long pixel_clock_hz = mode->clock * 1000;
+ unsigned long pll_clock = pixel_clock_hz * dsi->divider;
+ int divider;
+
+ /* Find what divider gets us a faster clock than the requested
+ * pixel clock.
+ */
+ for (divider = 1; divider < 8; divider++) {
+ if (parent_rate / divider < pll_clock) {
+ divider--;
+ break;
+ }
+ }
+
+ /* Now that we've picked a PLL divider, calculate back to its
+ * pixel clock.
+ */
+ pll_clock = parent_rate / divider;
+ pixel_clock_hz = pll_clock / dsi->divider;
+
+ adjusted_mode->clock = pixel_clock_hz / 1000;
+
+ /* Given the new pixel clock, adjust HFP to keep vrefresh the same. */
+ adjusted_mode->htotal = adjusted_mode->clock * mode->htotal /
+ mode->clock;
+ adjusted_mode->hsync_end += adjusted_mode->htotal - mode->htotal;
+ adjusted_mode->hsync_start += adjusted_mode->htotal - mode->htotal;
+
+ return true;
+}
+
+static void vc4_dsi_encoder_enable(struct drm_encoder *encoder)
+{
+ struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode;
+ struct vc4_dsi_encoder *vc4_encoder = to_vc4_dsi_encoder(encoder);
+ struct vc4_dsi *dsi = vc4_encoder->dsi;
+ struct device *dev = &dsi->pdev->dev;
+ bool debug_dump_regs = false;
+ unsigned long hs_clock;
+ u32 ui_ns;
+ /* Minimum LP state duration in escape clock cycles. */
+ u32 lpx = dsi_esc_timing(60);
+ unsigned long pixel_clock_hz = mode->clock * 1000;
+ unsigned long dsip_clock;
+ unsigned long phy_clock;
+ int ret;
+
+ ret = pm_runtime_get_sync(dev);
+ if (ret) {
+ DRM_ERROR("Failed to runtime PM enable on DSI%d\n", dsi->port);
+ return;
+ }
+
+ if (debug_dump_regs) {
+ DRM_INFO("DSI regs before:\n");
+ vc4_dsi_dump_regs(dsi);
+ }
+
+ /* Round up the clk_set_rate() request slightly, since
+ * PLLD_DSI1 is an integer divider and its rate selection will
+ * never round up.
+ */
+ phy_clock = (pixel_clock_hz + 1000) * dsi->divider;
+ ret = clk_set_rate(dsi->pll_phy_clock, phy_clock);
+ if (ret) {
+ dev_err(&dsi->pdev->dev,
+ "Failed to set phy clock to %ld: %d\n", phy_clock, ret);
+ }
+
+ /* Reset the DSI and all its fifos. */
+ DSI_PORT_WRITE(CTRL,
+ DSI_CTRL_SOFT_RESET_CFG |
+ DSI_PORT_BIT(CTRL_RESET_FIFOS));
+
+ DSI_PORT_WRITE(CTRL,
+ DSI_CTRL_HSDT_EOT_DISABLE |
+ DSI_CTRL_RX_LPDT_EOT_DISABLE);
+
+ /* Clear all stat bits so we see what has happened during enable. */
+ DSI_PORT_WRITE(STAT, DSI_PORT_READ(STAT));
+
+ /* Set AFE CTR00/CTR1 to release powerdown of analog. */
+ if (dsi->port == 0) {
+ u32 afec0 = (VC4_SET_FIELD(7, DSI_PHY_AFEC0_PTATADJ) |
+ VC4_SET_FIELD(7, DSI_PHY_AFEC0_CTATADJ));
+
+ if (dsi->lanes < 2)
+ afec0 |= DSI0_PHY_AFEC0_PD_DLANE1;
+
+ if (!(dsi->mode_flags & MIPI_DSI_MODE_VIDEO))
+ afec0 |= DSI0_PHY_AFEC0_RESET;
+
+ DSI_PORT_WRITE(PHY_AFEC0, afec0);
+
+ DSI_PORT_WRITE(PHY_AFEC1,
+ VC4_SET_FIELD(6, DSI0_PHY_AFEC1_IDR_DLANE1) |
+ VC4_SET_FIELD(6, DSI0_PHY_AFEC1_IDR_DLANE0) |
+ VC4_SET_FIELD(6, DSI0_PHY_AFEC1_IDR_CLANE));
+ } else {
+ u32 afec0 = (VC4_SET_FIELD(7, DSI_PHY_AFEC0_PTATADJ) |
+ VC4_SET_FIELD(7, DSI_PHY_AFEC0_CTATADJ) |
+ VC4_SET_FIELD(6, DSI1_PHY_AFEC0_IDR_CLANE) |
+ VC4_SET_FIELD(6, DSI1_PHY_AFEC0_IDR_DLANE0) |
+ VC4_SET_FIELD(6, DSI1_PHY_AFEC0_IDR_DLANE1) |
+ VC4_SET_FIELD(6, DSI1_PHY_AFEC0_IDR_DLANE2) |
+ VC4_SET_FIELD(6, DSI1_PHY_AFEC0_IDR_DLANE3));
+
+ if (dsi->lanes < 4)
+ afec0 |= DSI1_PHY_AFEC0_PD_DLANE3;
+ if (dsi->lanes < 3)
+ afec0 |= DSI1_PHY_AFEC0_PD_DLANE2;
+ if (dsi->lanes < 2)
+ afec0 |= DSI1_PHY_AFEC0_PD_DLANE1;
+
+ afec0 |= DSI1_PHY_AFEC0_RESET;
+
+ DSI_PORT_WRITE(PHY_AFEC0, afec0);
+
+ DSI_PORT_WRITE(PHY_AFEC1, 0);
+
+ /* AFEC reset hold time */
+ mdelay(1);
+ }
+
+ ret = clk_prepare_enable(dsi->escape_clock);
+ if (ret) {
+ DRM_ERROR("Failed to turn on DSI escape clock: %d\n", ret);
+ return;
+ }
+
+ ret = clk_prepare_enable(dsi->pll_phy_clock);
+ if (ret) {
+ DRM_ERROR("Failed to turn on DSI PLL: %d\n", ret);
+ return;
+ }
+
+ hs_clock = clk_get_rate(dsi->pll_phy_clock);
+
+ /* Yes, we set the DSI0P/DSI1P pixel clock to the byte rate,
+ * not the pixel clock rate. DSIxP take from the APHY's byte,
+ * DDR2, or DDR4 clock (we use byte) and feed into the PV at
+ * that rate. Separately, a value derived from PIX_CLK_DIV
+ * and HS_CLKC is fed into the PV to divide down to the actual
+ * pixel clock for pushing pixels into DSI.
+ */
+ dsip_clock = phy_clock / 8;
+ ret = clk_set_rate(dsi->pixel_clock, dsip_clock);
+ if (ret) {
+ dev_err(dev, "Failed to set pixel clock to %ldHz: %d\n",
+ dsip_clock, ret);
+ }
+
+ ret = clk_prepare_enable(dsi->pixel_clock);
+ if (ret) {
+ DRM_ERROR("Failed to turn on DSI pixel clock: %d\n", ret);
+ return;
+ }
+
+ /* How many ns one DSI unit interval is. Note that the clock
+ * is DDR, so there's an extra divide by 2.
+ */
+ ui_ns = DIV_ROUND_UP(500000000, hs_clock);
+
+ DSI_PORT_WRITE(HS_CLT0,
+ VC4_SET_FIELD(dsi_hs_timing(ui_ns, 262, 0),
+ DSI_HS_CLT0_CZERO) |
+ VC4_SET_FIELD(dsi_hs_timing(ui_ns, 0, 8),
+ DSI_HS_CLT0_CPRE) |
+ VC4_SET_FIELD(dsi_hs_timing(ui_ns, 38, 0),
+ DSI_HS_CLT0_CPREP));
+
+ DSI_PORT_WRITE(HS_CLT1,
+ VC4_SET_FIELD(dsi_hs_timing(ui_ns, 60, 0),
+ DSI_HS_CLT1_CTRAIL) |
+ VC4_SET_FIELD(dsi_hs_timing(ui_ns, 60, 52),
+ DSI_HS_CLT1_CPOST));
+
+ DSI_PORT_WRITE(HS_CLT2,
+ VC4_SET_FIELD(dsi_hs_timing(ui_ns, 1000000, 0),
+ DSI_HS_CLT2_WUP));
+
+ DSI_PORT_WRITE(HS_DLT3,
+ VC4_SET_FIELD(dsi_hs_timing(ui_ns, 100, 0),
+ DSI_HS_DLT3_EXIT) |
+ VC4_SET_FIELD(dsi_hs_timing(ui_ns, 105, 6),
+ DSI_HS_DLT3_ZERO) |
+ VC4_SET_FIELD(dsi_hs_timing(ui_ns, 40, 4),
+ DSI_HS_DLT3_PRE));
+
+ DSI_PORT_WRITE(HS_DLT4,
+ VC4_SET_FIELD(dsi_hs_timing(ui_ns, lpx * ESC_TIME_NS, 0),
+ DSI_HS_DLT4_LPX) |
+ VC4_SET_FIELD(max(dsi_hs_timing(ui_ns, 0, 8),
+ dsi_hs_timing(ui_ns, 60, 4)),
+ DSI_HS_DLT4_TRAIL) |
+ VC4_SET_FIELD(0, DSI_HS_DLT4_ANLAT));
+
+ /* T_INIT is how long STOP is driven after power-up to
+ * indicate to the slave (also coming out of power-up) that
+ * master init is complete, and should be greater than the
+ * maximum of two value: T_INIT,MASTER and T_INIT,SLAVE. The
+ * D-PHY spec gives a minimum 100us for T_INIT,MASTER and
+ * T_INIT,SLAVE, while allowing protocols on top of it to give
+ * greater minimums. The vc4 firmware uses an extremely
+ * conservative 5ms, and we maintain that here.
+ */
+ DSI_PORT_WRITE(HS_DLT5, VC4_SET_FIELD(dsi_hs_timing(ui_ns,
+ 5 * 1000 * 1000, 0),
+ DSI_HS_DLT5_INIT));
+
+ DSI_PORT_WRITE(HS_DLT6,
+ VC4_SET_FIELD(lpx * 5, DSI_HS_DLT6_TA_GET) |
+ VC4_SET_FIELD(lpx, DSI_HS_DLT6_TA_SURE) |
+ VC4_SET_FIELD(lpx * 4, DSI_HS_DLT6_TA_GO) |
+ VC4_SET_FIELD(lpx, DSI_HS_DLT6_LP_LPX));
+
+ DSI_PORT_WRITE(HS_DLT7,
+ VC4_SET_FIELD(dsi_esc_timing(1000000),
+ DSI_HS_DLT7_LP_WUP));
+
+ DSI_PORT_WRITE(PHYC,
+ DSI_PHYC_DLANE0_ENABLE |
+ (dsi->lanes >= 2 ? DSI_PHYC_DLANE1_ENABLE : 0) |
+ (dsi->lanes >= 3 ? DSI_PHYC_DLANE2_ENABLE : 0) |
+ (dsi->lanes >= 4 ? DSI_PHYC_DLANE3_ENABLE : 0) |
+ DSI_PORT_BIT(PHYC_CLANE_ENABLE) |
+ ((dsi->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS) ?
+ 0 : DSI_PORT_BIT(PHYC_HS_CLK_CONTINUOUS)) |
+ (dsi->port == 0 ?
+ VC4_SET_FIELD(lpx - 1, DSI0_PHYC_ESC_CLK_LPDT) :
+ VC4_SET_FIELD(lpx - 1, DSI1_PHYC_ESC_CLK_LPDT)));
+
+ DSI_PORT_WRITE(CTRL,
+ DSI_PORT_READ(CTRL) |
+ DSI_CTRL_CAL_BYTE);
+
+ /* HS timeout in HS clock cycles: disabled. */
+ DSI_PORT_WRITE(HSTX_TO_CNT, 0);
+ /* LP receive timeout in HS clocks. */
+ DSI_PORT_WRITE(LPRX_TO_CNT, 0xffffff);
+ /* Bus turnaround timeout */
+ DSI_PORT_WRITE(TA_TO_CNT, 100000);
+ /* Display reset sequence timeout */
+ DSI_PORT_WRITE(PR_TO_CNT, 100000);
+
+ /* Set up DISP1 for transferring long command payloads through
+ * the pixfifo.
+ */
+ DSI_PORT_WRITE(DISP1_CTRL,
+ VC4_SET_FIELD(DSI_DISP1_PFORMAT_32BIT_LE,
+ DSI_DISP1_PFORMAT) |
+ DSI_DISP1_ENABLE);
+
+ /* Ungate the block. */
+ if (dsi->port == 0)
+ DSI_PORT_WRITE(CTRL, DSI_PORT_READ(CTRL) | DSI0_CTRL_CTRL0);
+ else
+ DSI_PORT_WRITE(CTRL, DSI_PORT_READ(CTRL) | DSI1_CTRL_EN);
+
+ /* Bring AFE out of reset. */
+ if (dsi->port == 0) {
+ } else {
+ DSI_PORT_WRITE(PHY_AFEC0,
+ DSI_PORT_READ(PHY_AFEC0) &
+ ~DSI1_PHY_AFEC0_RESET);
+ }
+
+ vc4_dsi_ulps(dsi, false);
+
+ drm_bridge_pre_enable(dsi->bridge);
+
+ if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO) {
+ DSI_PORT_WRITE(DISP0_CTRL,
+ VC4_SET_FIELD(dsi->divider,
+ DSI_DISP0_PIX_CLK_DIV) |
+ VC4_SET_FIELD(dsi->format, DSI_DISP0_PFORMAT) |
+ VC4_SET_FIELD(DSI_DISP0_LP_STOP_PERFRAME,
+ DSI_DISP0_LP_STOP_CTRL) |
+ DSI_DISP0_ST_END |
+ DSI_DISP0_ENABLE);
+ } else {
+ DSI_PORT_WRITE(DISP0_CTRL,
+ DSI_DISP0_COMMAND_MODE |
+ DSI_DISP0_ENABLE);
+ }
+
+ drm_bridge_enable(dsi->bridge);
+
+ if (debug_dump_regs) {
+ DRM_INFO("DSI regs after:\n");
+ vc4_dsi_dump_regs(dsi);
+ }
+}
+
+static ssize_t vc4_dsi_host_transfer(struct mipi_dsi_host *host,
+ const struct mipi_dsi_msg *msg)
+{
+ struct vc4_dsi *dsi = host_to_dsi(host);
+ struct mipi_dsi_packet packet;
+ u32 pkth = 0, pktc = 0;
+ int i, ret;
+ bool is_long = mipi_dsi_packet_format_is_long(msg->type);
+ u32 cmd_fifo_len = 0, pix_fifo_len = 0;
+
+ mipi_dsi_create_packet(&packet, msg);
+
+ pkth |= VC4_SET_FIELD(packet.header[0], DSI_TXPKT1H_BC_DT);
+ pkth |= VC4_SET_FIELD(packet.header[1] |
+ (packet.header[2] << 8),
+ DSI_TXPKT1H_BC_PARAM);
+ if (is_long) {
+ /* Divide data across the various FIFOs we have available.
+ * The command FIFO takes byte-oriented data, but is of
+ * limited size. The pixel FIFO (never actually used for
+ * pixel data in reality) is word oriented, and substantially
+ * larger. So, we use the pixel FIFO for most of the data,
+ * sending the residual bytes in the command FIFO at the start.
+ *
+ * With this arrangement, the command FIFO will never get full.
+ */
+ if (packet.payload_length <= 16) {
+ cmd_fifo_len = packet.payload_length;
+ pix_fifo_len = 0;
+ } else {
+ cmd_fifo_len = (packet.payload_length %
+ DSI_PIX_FIFO_WIDTH);
+ pix_fifo_len = ((packet.payload_length - cmd_fifo_len) /
+ DSI_PIX_FIFO_WIDTH);
+ }
+
+ WARN_ON_ONCE(pix_fifo_len >= DSI_PIX_FIFO_DEPTH);
+
+ pkth |= VC4_SET_FIELD(cmd_fifo_len, DSI_TXPKT1H_BC_CMDFIFO);
+ }
+
+ if (msg->rx_len) {
+ pktc |= VC4_SET_FIELD(DSI_TXPKT1C_CMD_CTRL_RX,
+ DSI_TXPKT1C_CMD_CTRL);
+ } else {
+ pktc |= VC4_SET_FIELD(DSI_TXPKT1C_CMD_CTRL_TX,
+ DSI_TXPKT1C_CMD_CTRL);
+ }
+
+ for (i = 0; i < cmd_fifo_len; i++)
+ DSI_PORT_WRITE(TXPKT_CMD_FIFO, packet.payload[i]);
+ for (i = 0; i < pix_fifo_len; i++) {
+ const u8 *pix = packet.payload + cmd_fifo_len + i * 4;
+
+ DSI_PORT_WRITE(TXPKT_PIX_FIFO,
+ pix[0] |
+ pix[1] << 8 |
+ pix[2] << 16 |
+ pix[3] << 24);
+ }
+
+ if (msg->flags & MIPI_DSI_MSG_USE_LPM)
+ pktc |= DSI_TXPKT1C_CMD_MODE_LP;
+ if (is_long)
+ pktc |= DSI_TXPKT1C_CMD_TYPE_LONG;
+
+ /* Send one copy of the packet. Larger repeats are used for pixel
+ * data in command mode.
+ */
+ pktc |= VC4_SET_FIELD(1, DSI_TXPKT1C_CMD_REPEAT);
+
+ pktc |= DSI_TXPKT1C_CMD_EN;
+ if (pix_fifo_len) {
+ pktc |= VC4_SET_FIELD(DSI_TXPKT1C_DISPLAY_NO_SECONDARY,
+ DSI_TXPKT1C_DISPLAY_NO);
+ } else {
+ pktc |= VC4_SET_FIELD(DSI_TXPKT1C_DISPLAY_NO_SHORT,
+ DSI_TXPKT1C_DISPLAY_NO);
+ }
+
+ /* Enable the appropriate interrupt for the transfer completion. */
+ dsi->xfer_result = 0;
+ reinit_completion(&dsi->xfer_completion);
+ DSI_PORT_WRITE(INT_STAT, DSI1_INT_TXPKT1_DONE | DSI1_INT_PHY_DIR_RTF);
+ if (msg->rx_len) {
+ DSI_PORT_WRITE(INT_EN, (DSI1_INTERRUPTS_ALWAYS_ENABLED |
+ DSI1_INT_PHY_DIR_RTF));
+ } else {
+ DSI_PORT_WRITE(INT_EN, (DSI1_INTERRUPTS_ALWAYS_ENABLED |
+ DSI1_INT_TXPKT1_DONE));
+ }
+
+ /* Send the packet. */
+ DSI_PORT_WRITE(TXPKT1H, pkth);
+ DSI_PORT_WRITE(TXPKT1C, pktc);
+
+ if (!wait_for_completion_timeout(&dsi->xfer_completion,
+ msecs_to_jiffies(1000))) {
+ dev_err(&dsi->pdev->dev, "transfer interrupt wait timeout");
+ dev_err(&dsi->pdev->dev, "instat: 0x%08x\n",
+ DSI_PORT_READ(INT_STAT));
+ ret = -ETIMEDOUT;
+ } else {
+ ret = dsi->xfer_result;
+ }
+
+ DSI_PORT_WRITE(INT_EN, DSI1_INTERRUPTS_ALWAYS_ENABLED);
+
+ if (ret)
+ goto reset_fifo_and_return;
+
+ if (ret == 0 && msg->rx_len) {
+ u32 rxpkt1h = DSI_PORT_READ(RXPKT1H);
+ u8 *msg_rx = msg->rx_buf;
+
+ if (rxpkt1h & DSI_RXPKT1H_PKT_TYPE_LONG) {
+ u32 rxlen = VC4_GET_FIELD(rxpkt1h,
+ DSI_RXPKT1H_BC_PARAM);
+
+ if (rxlen != msg->rx_len) {
+ DRM_ERROR("DSI returned %db, expecting %db\n",
+ rxlen, (int)msg->rx_len);
+ ret = -ENXIO;
+ goto reset_fifo_and_return;
+ }
+
+ for (i = 0; i < msg->rx_len; i++)
+ msg_rx[i] = DSI_READ(DSI1_RXPKT_FIFO);
+ } else {
+ /* FINISHME: Handle AWER */
+
+ msg_rx[0] = VC4_GET_FIELD(rxpkt1h,
+ DSI_RXPKT1H_SHORT_0);
+ if (msg->rx_len > 1) {
+ msg_rx[1] = VC4_GET_FIELD(rxpkt1h,
+ DSI_RXPKT1H_SHORT_1);
+ }
+ }
+ }
+
+ return ret;
+
+reset_fifo_and_return:
+ DRM_ERROR("DSI transfer failed, resetting: %d\n", ret);
+
+ DSI_PORT_WRITE(TXPKT1C, DSI_PORT_READ(TXPKT1C) & ~DSI_TXPKT1C_CMD_EN);
+ udelay(1);
+ DSI_PORT_WRITE(CTRL,
+ DSI_PORT_READ(CTRL) |
+ DSI_PORT_BIT(CTRL_RESET_FIFOS));
+
+ DSI_PORT_WRITE(TXPKT1C, 0);
+ DSI_PORT_WRITE(INT_EN, DSI1_INTERRUPTS_ALWAYS_ENABLED);
+ return ret;
+}
+
+static int vc4_dsi_host_attach(struct mipi_dsi_host *host,
+ struct mipi_dsi_device *device)
+{
+ struct vc4_dsi *dsi = host_to_dsi(host);
+
+ dsi->lanes = device->lanes;
+ dsi->channel = device->channel;
+ dsi->mode_flags = device->mode_flags;
+
+ switch (device->format) {
+ case MIPI_DSI_FMT_RGB888:
+ dsi->format = DSI_PFORMAT_RGB888;
+ dsi->divider = 24 / dsi->lanes;
+ break;
+ case MIPI_DSI_FMT_RGB666:
+ dsi->format = DSI_PFORMAT_RGB666;
+ dsi->divider = 24 / dsi->lanes;
+ break;
+ case MIPI_DSI_FMT_RGB666_PACKED:
+ dsi->format = DSI_PFORMAT_RGB666_PACKED;
+ dsi->divider = 18 / dsi->lanes;
+ break;
+ case MIPI_DSI_FMT_RGB565:
+ dsi->format = DSI_PFORMAT_RGB565;
+ dsi->divider = 16 / dsi->lanes;
+ break;
+ default:
+ dev_err(&dsi->pdev->dev, "Unknown DSI format: %d.\n",
+ dsi->format);
+ return 0;
+ }
+
+ if (!(dsi->mode_flags & MIPI_DSI_MODE_VIDEO)) {
+ dev_err(&dsi->pdev->dev,
+ "Only VIDEO mode panels supported currently.\n");
+ return 0;
+ }
+
+ return 0;
+}
+
+static int vc4_dsi_host_detach(struct mipi_dsi_host *host,
+ struct mipi_dsi_device *device)
+{
+ return 0;
+}
+
+static const struct mipi_dsi_host_ops vc4_dsi_host_ops = {
+ .attach = vc4_dsi_host_attach,
+ .detach = vc4_dsi_host_detach,
+ .transfer = vc4_dsi_host_transfer,
+};
+
+static const struct drm_encoder_helper_funcs vc4_dsi_encoder_helper_funcs = {
+ .disable = vc4_dsi_encoder_disable,
+ .enable = vc4_dsi_encoder_enable,
+ .mode_fixup = vc4_dsi_encoder_mode_fixup,
+};
+
+static const struct of_device_id vc4_dsi_dt_match[] = {
+ { .compatible = "brcm,bcm2835-dsi1", (void *)(uintptr_t)1 },
+ {}
+};
+
+static void dsi_handle_error(struct vc4_dsi *dsi,
+ irqreturn_t *ret, u32 stat, u32 bit,
+ const char *type)
+{
+ if (!(stat & bit))
+ return;
+
+ DRM_ERROR("DSI%d: %s error\n", dsi->port, type);
+ *ret = IRQ_HANDLED;
+}
+
+/*
+ * Initial handler for port 1 where we need the reg_dma workaround.
+ * The register DMA writes sleep, so we can't do it in the top half.
+ * Instead we use IRQF_ONESHOT so that the IRQ gets disabled in the
+ * parent interrupt contrller until our interrupt thread is done.
+ */
+static irqreturn_t vc4_dsi_irq_defer_to_thread_handler(int irq, void *data)
+{
+ struct vc4_dsi *dsi = data;
+ u32 stat = DSI_PORT_READ(INT_STAT);
+
+ if (!stat)
+ return IRQ_NONE;
+
+ return IRQ_WAKE_THREAD;
+}
+
+/*
+ * Normal IRQ handler for port 0, or the threaded IRQ handler for port
+ * 1 where we need the reg_dma workaround.
+ */
+static irqreturn_t vc4_dsi_irq_handler(int irq, void *data)
+{
+ struct vc4_dsi *dsi = data;
+ u32 stat = DSI_PORT_READ(INT_STAT);
+ irqreturn_t ret = IRQ_NONE;
+
+ DSI_PORT_WRITE(INT_STAT, stat);
+
+ dsi_handle_error(dsi, &ret, stat,
+ DSI1_INT_ERR_SYNC_ESC, "LPDT sync");
+ dsi_handle_error(dsi, &ret, stat,
+ DSI1_INT_ERR_CONTROL, "data lane 0 sequence");
+ dsi_handle_error(dsi, &ret, stat,
+ DSI1_INT_ERR_CONT_LP0, "LP0 contention");
+ dsi_handle_error(dsi, &ret, stat,
+ DSI1_INT_ERR_CONT_LP1, "LP1 contention");
+ dsi_handle_error(dsi, &ret, stat,
+ DSI1_INT_HSTX_TO, "HSTX timeout");
+ dsi_handle_error(dsi, &ret, stat,
+ DSI1_INT_LPRX_TO, "LPRX timeout");
+ dsi_handle_error(dsi, &ret, stat,
+ DSI1_INT_TA_TO, "turnaround timeout");
+ dsi_handle_error(dsi, &ret, stat,
+ DSI1_INT_PR_TO, "peripheral reset timeout");
+
+ if (stat & (DSI1_INT_TXPKT1_DONE | DSI1_INT_PHY_DIR_RTF)) {
+ complete(&dsi->xfer_completion);
+ ret = IRQ_HANDLED;
+ } else if (stat & DSI1_INT_HSTX_TO) {
+ complete(&dsi->xfer_completion);
+ dsi->xfer_result = -ETIMEDOUT;
+ ret = IRQ_HANDLED;
+ }
+
+ return ret;
+}
+
+/**
+ * vc4_dsi_init_phy_clocks - Exposes clocks generated by the analog
+ * PHY that are consumed by CPRMAN (clk-bcm2835.c).
+ * @dsi: DSI encoder
+ */
+static int
+vc4_dsi_init_phy_clocks(struct vc4_dsi *dsi)
+{
+ struct device *dev = &dsi->pdev->dev;
+ const char *parent_name = __clk_get_name(dsi->pll_phy_clock);
+ static const struct {
+ const char *dsi0_name, *dsi1_name;
+ int div;
+ } phy_clocks[] = {
+ { "dsi0_byte", "dsi1_byte", 8 },
+ { "dsi0_ddr2", "dsi1_ddr2", 4 },
+ { "dsi0_ddr", "dsi1_ddr", 2 },
+ };
+ int i;
+
+ dsi->clk_onecell = devm_kzalloc(dev,
+ sizeof(*dsi->clk_onecell) +
+ ARRAY_SIZE(phy_clocks) *
+ sizeof(struct clk_hw *),
+ GFP_KERNEL);
+ if (!dsi->clk_onecell)
+ return -ENOMEM;
+ dsi->clk_onecell->num = ARRAY_SIZE(phy_clocks);
+
+ for (i = 0; i < ARRAY_SIZE(phy_clocks); i++) {
+ struct clk_fixed_factor *fix = &dsi->phy_clocks[i];
+ struct clk_init_data init;
+ int ret;
+
+ /* We just use core fixed factor clock ops for the PHY
+ * clocks. The clocks are actually gated by the
+ * PHY_AFEC0_DDRCLK_EN bits, which we should be
+ * setting if we use the DDR/DDR2 clocks. However,
+ * vc4_dsi_encoder_enable() is setting up both AFEC0,
+ * setting both our parent DSI PLL's rate and this
+ * clock's rate, so it knows if DDR/DDR2 are going to
+ * be used and could enable the gates itself.
+ */
+ fix->mult = 1;
+ fix->div = phy_clocks[i].div;
+ fix->hw.init = &init;
+
+ memset(&init, 0, sizeof(init));
+ init.parent_names = &parent_name;
+ init.num_parents = 1;
+ if (dsi->port == 1)
+ init.name = phy_clocks[i].dsi1_name;
+ else
+ init.name = phy_clocks[i].dsi0_name;
+ init.ops = &clk_fixed_factor_ops;
+
+ ret = devm_clk_hw_register(dev, &fix->hw);
+ if (ret)
+ return ret;
+
+ dsi->clk_onecell->hws[i] = &fix->hw;
+ }
+
+ return of_clk_add_hw_provider(dev->of_node,
+ of_clk_hw_onecell_get,
+ dsi->clk_onecell);
+}
+
+static int vc4_dsi_bind(struct device *dev, struct device *master, void *data)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct drm_device *drm = dev_get_drvdata(master);
+ struct vc4_dev *vc4 = to_vc4_dev(drm);
+ struct vc4_dsi *dsi = dev_get_drvdata(dev);
+ struct vc4_dsi_encoder *vc4_dsi_encoder;
+ struct drm_panel *panel;
+ const struct of_device_id *match;
+ dma_cap_mask_t dma_mask;
+ int ret;
+
+ match = of_match_device(vc4_dsi_dt_match, dev);
+ if (!match)
+ return -ENODEV;
+
+ dsi->port = (uintptr_t)match->data;
+
+ vc4_dsi_encoder = devm_kzalloc(dev, sizeof(*vc4_dsi_encoder),
+ GFP_KERNEL);
+ if (!vc4_dsi_encoder)
+ return -ENOMEM;
+ vc4_dsi_encoder->base.type = VC4_ENCODER_TYPE_DSI1;
+ vc4_dsi_encoder->dsi = dsi;
+ dsi->encoder = &vc4_dsi_encoder->base.base;
+
+ dsi->regs = vc4_ioremap_regs(pdev, 0);
+ if (IS_ERR(dsi->regs))
+ return PTR_ERR(dsi->regs);
+
+ if (DSI_PORT_READ(ID) != DSI_ID_VALUE) {
+ dev_err(dev, "Port returned 0x%08x for ID instead of 0x%08x\n",
+ DSI_PORT_READ(ID), DSI_ID_VALUE);
+ return -ENODEV;
+ }
+
+ /* DSI1 has a broken AXI slave that doesn't respond to writes
+ * from the ARM. It does handle writes from the DMA engine,
+ * so set up a channel for talking to it.
+ */
+ if (dsi->port == 1) {
+ dsi->reg_dma_mem = dma_alloc_coherent(dev, 4,
+ &dsi->reg_dma_paddr,
+ GFP_KERNEL);
+ if (!dsi->reg_dma_mem) {
+ DRM_ERROR("Failed to get DMA memory\n");
+ return -ENOMEM;
+ }
+
+ dma_cap_zero(dma_mask);
+ dma_cap_set(DMA_MEMCPY, dma_mask);
+ dsi->reg_dma_chan = dma_request_chan_by_mask(&dma_mask);
+ if (IS_ERR(dsi->reg_dma_chan)) {
+ ret = PTR_ERR(dsi->reg_dma_chan);
+ if (ret != -EPROBE_DEFER)
+ DRM_ERROR("Failed to get DMA channel: %d\n",
+ ret);
+ return ret;
+ }
+
+ /* Get the physical address of the device's registers. The
+ * struct resource for the regs gives us the bus address
+ * instead.
+ */
+ dsi->reg_paddr = be32_to_cpup(of_get_address(dev->of_node,
+ 0, NULL, NULL));
+ }
+
+ init_completion(&dsi->xfer_completion);
+ /* At startup enable error-reporting interrupts and nothing else. */
+ DSI_PORT_WRITE(INT_EN, DSI1_INTERRUPTS_ALWAYS_ENABLED);
+ /* Clear any existing interrupt state. */
+ DSI_PORT_WRITE(INT_STAT, DSI_PORT_READ(INT_STAT));
+
+ if (dsi->reg_dma_mem)
+ ret = devm_request_threaded_irq(dev, platform_get_irq(pdev, 0),
+ vc4_dsi_irq_defer_to_thread_handler,
+ vc4_dsi_irq_handler,
+ IRQF_ONESHOT,
+ "vc4 dsi", dsi);
+ else
+ ret = devm_request_irq(dev, platform_get_irq(pdev, 0),
+ vc4_dsi_irq_handler, 0, "vc4 dsi", dsi);
+ if (ret) {
+ if (ret != -EPROBE_DEFER)
+ dev_err(dev, "Failed to get interrupt: %d\n", ret);
+ return ret;
+ }
+
+ dsi->escape_clock = devm_clk_get(dev, "escape");
+ if (IS_ERR(dsi->escape_clock)) {
+ ret = PTR_ERR(dsi->escape_clock);
+ if (ret != -EPROBE_DEFER)
+ dev_err(dev, "Failed to get escape clock: %d\n", ret);
+ return ret;
+ }
+
+ dsi->pll_phy_clock = devm_clk_get(dev, "phy");
+ if (IS_ERR(dsi->pll_phy_clock)) {
+ ret = PTR_ERR(dsi->pll_phy_clock);
+ if (ret != -EPROBE_DEFER)
+ dev_err(dev, "Failed to get phy clock: %d\n", ret);
+ return ret;
+ }
+
+ dsi->pixel_clock = devm_clk_get(dev, "pixel");
+ if (IS_ERR(dsi->pixel_clock)) {
+ ret = PTR_ERR(dsi->pixel_clock);
+ if (ret != -EPROBE_DEFER)
+ dev_err(dev, "Failed to get pixel clock: %d\n", ret);
+ return ret;
+ }
+
+ ret = drm_of_find_panel_or_bridge(dev->of_node, 0, 0,
+ &panel, &dsi->bridge);
+ if (ret) {
+ /* If the bridge or panel pointed by dev->of_node is not
+ * enabled, just return 0 here so that we don't prevent the DRM
+ * dev from being registered. Of course that means the DSI
+ * encoder won't be exposed, but that's not a problem since
+ * nothing is connected to it.
+ */
+ if (ret == -ENODEV)
+ return 0;
+
+ return ret;
+ }
+
+ if (panel) {
+ dsi->bridge = devm_drm_panel_bridge_add(dev, panel,
+ DRM_MODE_CONNECTOR_DSI);
+ if (IS_ERR(dsi->bridge))
+ return PTR_ERR(dsi->bridge);
+ }
+
+ /* The esc clock rate is supposed to always be 100Mhz. */
+ ret = clk_set_rate(dsi->escape_clock, 100 * 1000000);
+ if (ret) {
+ dev_err(dev, "Failed to set esc clock: %d\n", ret);
+ return ret;
+ }
+
+ ret = vc4_dsi_init_phy_clocks(dsi);
+ if (ret)
+ return ret;
+
+ if (dsi->port == 1)
+ vc4->dsi1 = dsi;
+
+ drm_encoder_init(drm, dsi->encoder, &vc4_dsi_encoder_funcs,
+ DRM_MODE_ENCODER_DSI, NULL);
+ drm_encoder_helper_add(dsi->encoder, &vc4_dsi_encoder_helper_funcs);
+
+ ret = drm_bridge_attach(dsi->encoder, dsi->bridge, NULL);
+ if (ret) {
+ dev_err(dev, "bridge attach failed: %d\n", ret);
+ return ret;
+ }
+ /* Disable the atomic helper calls into the bridge. We
+ * manually call the bridge pre_enable / enable / etc. calls
+ * from our driver, since we need to sequence them within the
+ * encoder's enable/disable paths.
+ */
+ dsi->encoder->bridge = NULL;
+
+ pm_runtime_enable(dev);
+
+ return 0;
+}
+
+static void vc4_dsi_unbind(struct device *dev, struct device *master,
+ void *data)
+{
+ struct drm_device *drm = dev_get_drvdata(master);
+ struct vc4_dev *vc4 = to_vc4_dev(drm);
+ struct vc4_dsi *dsi = dev_get_drvdata(dev);
+
+ if (dsi->bridge)
+ pm_runtime_disable(dev);
+
+ vc4_dsi_encoder_destroy(dsi->encoder);
+
+ if (dsi->port == 1)
+ vc4->dsi1 = NULL;
+}
+
+static const struct component_ops vc4_dsi_ops = {
+ .bind = vc4_dsi_bind,
+ .unbind = vc4_dsi_unbind,
+};
+
+static int vc4_dsi_dev_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct vc4_dsi *dsi;
+ int ret;
+
+ dsi = devm_kzalloc(dev, sizeof(*dsi), GFP_KERNEL);
+ if (!dsi)
+ return -ENOMEM;
+ dev_set_drvdata(dev, dsi);
+
+ dsi->pdev = pdev;
+
+ /* Note, the initialization sequence for DSI and panels is
+ * tricky. The component bind above won't get past its
+ * -EPROBE_DEFER until the panel/bridge probes. The
+ * panel/bridge will return -EPROBE_DEFER until it has a
+ * mipi_dsi_host to register its device to. So, we register
+ * the host during pdev probe time, so vc4 as a whole can then
+ * -EPROBE_DEFER its component bind process until the panel
+ * successfully attaches.
+ */
+ dsi->dsi_host.ops = &vc4_dsi_host_ops;
+ dsi->dsi_host.dev = dev;
+ mipi_dsi_host_register(&dsi->dsi_host);
+
+ ret = component_add(&pdev->dev, &vc4_dsi_ops);
+ if (ret) {
+ mipi_dsi_host_unregister(&dsi->dsi_host);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int vc4_dsi_dev_remove(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct vc4_dsi *dsi = dev_get_drvdata(dev);
+
+ component_del(&pdev->dev, &vc4_dsi_ops);
+ mipi_dsi_host_unregister(&dsi->dsi_host);
+
+ return 0;
+}
+
+struct platform_driver vc4_dsi_driver = {
+ .probe = vc4_dsi_dev_probe,
+ .remove = vc4_dsi_dev_remove,
+ .driver = {
+ .name = "vc4_dsi",
+ .of_match_table = vc4_dsi_dt_match,
+ },
+};
diff --git a/drivers/gpu/drm/vc4/vc4_fence.c b/drivers/gpu/drm/vc4/vc4_fence.c
new file mode 100644
index 000000000..580214e21
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_fence.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright © 2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "vc4_drv.h"
+
+static const char *vc4_fence_get_driver_name(struct dma_fence *fence)
+{
+ return "vc4";
+}
+
+static const char *vc4_fence_get_timeline_name(struct dma_fence *fence)
+{
+ return "vc4-v3d";
+}
+
+static bool vc4_fence_signaled(struct dma_fence *fence)
+{
+ struct vc4_fence *f = to_vc4_fence(fence);
+ struct vc4_dev *vc4 = to_vc4_dev(f->dev);
+
+ return vc4->finished_seqno >= f->seqno;
+}
+
+const struct dma_fence_ops vc4_fence_ops = {
+ .get_driver_name = vc4_fence_get_driver_name,
+ .get_timeline_name = vc4_fence_get_timeline_name,
+ .signaled = vc4_fence_signaled,
+};
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
new file mode 100644
index 000000000..7910b9ace
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -0,0 +1,1377 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/sched/signal.h>
+#include <linux/dma-fence-array.h>
+
+#include "uapi/drm/vc4_drm.h"
+#include "vc4_drv.h"
+#include "vc4_regs.h"
+#include "vc4_trace.h"
+
+static void
+vc4_queue_hangcheck(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+ mod_timer(&vc4->hangcheck.timer,
+ round_jiffies_up(jiffies + msecs_to_jiffies(100)));
+}
+
+struct vc4_hang_state {
+ struct drm_vc4_get_hang_state user_state;
+
+ u32 bo_count;
+ struct drm_gem_object **bo;
+};
+
+static void
+vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state)
+{
+ unsigned int i;
+
+ for (i = 0; i < state->user_state.bo_count; i++)
+ drm_gem_object_put_unlocked(state->bo[i]);
+
+ kfree(state);
+}
+
+int
+vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_vc4_get_hang_state *get_state = data;
+ struct drm_vc4_get_hang_state_bo *bo_state;
+ struct vc4_hang_state *kernel_state;
+ struct drm_vc4_get_hang_state *state;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ unsigned long irqflags;
+ u32 i;
+ int ret = 0;
+
+ spin_lock_irqsave(&vc4->job_lock, irqflags);
+ kernel_state = vc4->hang_state;
+ if (!kernel_state) {
+ spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+ return -ENOENT;
+ }
+ state = &kernel_state->user_state;
+
+ /* If the user's array isn't big enough, just return the
+ * required array size.
+ */
+ if (get_state->bo_count < state->bo_count) {
+ get_state->bo_count = state->bo_count;
+ spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+ return 0;
+ }
+
+ vc4->hang_state = NULL;
+ spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+
+ /* Save the user's BO pointer, so we don't stomp it with the memcpy. */
+ state->bo = get_state->bo;
+ memcpy(get_state, state, sizeof(*state));
+
+ bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL);
+ if (!bo_state) {
+ ret = -ENOMEM;
+ goto err_free;
+ }
+
+ for (i = 0; i < state->bo_count; i++) {
+ struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]);
+ u32 handle;
+
+ ret = drm_gem_handle_create(file_priv, kernel_state->bo[i],
+ &handle);
+
+ if (ret) {
+ state->bo_count = i;
+ goto err_delete_handle;
+ }
+ bo_state[i].handle = handle;
+ bo_state[i].paddr = vc4_bo->base.paddr;
+ bo_state[i].size = vc4_bo->base.base.size;
+ }
+
+ if (copy_to_user(u64_to_user_ptr(get_state->bo),
+ bo_state,
+ state->bo_count * sizeof(*bo_state)))
+ ret = -EFAULT;
+
+err_delete_handle:
+ if (ret) {
+ for (i = 0; i < state->bo_count; i++)
+ drm_gem_handle_delete(file_priv, bo_state[i].handle);
+ }
+
+err_free:
+ vc4_free_hang_state(dev, kernel_state);
+ kfree(bo_state);
+
+ return ret;
+}
+
+static void
+vc4_save_hang_state(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct drm_vc4_get_hang_state *state;
+ struct vc4_hang_state *kernel_state;
+ struct vc4_exec_info *exec[2];
+ struct vc4_bo *bo;
+ unsigned long irqflags;
+ unsigned int i, j, k, unref_list_count;
+
+ kernel_state = kcalloc(1, sizeof(*kernel_state), GFP_KERNEL);
+ if (!kernel_state)
+ return;
+
+ state = &kernel_state->user_state;
+
+ spin_lock_irqsave(&vc4->job_lock, irqflags);
+ exec[0] = vc4_first_bin_job(vc4);
+ exec[1] = vc4_first_render_job(vc4);
+ if (!exec[0] && !exec[1]) {
+ spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+ return;
+ }
+
+ /* Get the bos from both binner and renderer into hang state. */
+ state->bo_count = 0;
+ for (i = 0; i < 2; i++) {
+ if (!exec[i])
+ continue;
+
+ unref_list_count = 0;
+ list_for_each_entry(bo, &exec[i]->unref_list, unref_head)
+ unref_list_count++;
+ state->bo_count += exec[i]->bo_count + unref_list_count;
+ }
+
+ kernel_state->bo = kcalloc(state->bo_count,
+ sizeof(*kernel_state->bo), GFP_ATOMIC);
+
+ if (!kernel_state->bo) {
+ spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+ return;
+ }
+
+ k = 0;
+ for (i = 0; i < 2; i++) {
+ if (!exec[i])
+ continue;
+
+ for (j = 0; j < exec[i]->bo_count; j++) {
+ bo = to_vc4_bo(&exec[i]->bo[j]->base);
+
+ /* Retain BOs just in case they were marked purgeable.
+ * This prevents the BO from being purged before
+ * someone had a chance to dump the hang state.
+ */
+ WARN_ON(!refcount_read(&bo->usecnt));
+ refcount_inc(&bo->usecnt);
+ drm_gem_object_get(&exec[i]->bo[j]->base);
+ kernel_state->bo[k++] = &exec[i]->bo[j]->base;
+ }
+
+ list_for_each_entry(bo, &exec[i]->unref_list, unref_head) {
+ /* No need to retain BOs coming from the ->unref_list
+ * because they are naturally unpurgeable.
+ */
+ drm_gem_object_get(&bo->base.base);
+ kernel_state->bo[k++] = &bo->base.base;
+ }
+ }
+
+ WARN_ON_ONCE(k != state->bo_count);
+
+ if (exec[0])
+ state->start_bin = exec[0]->ct0ca;
+ if (exec[1])
+ state->start_render = exec[1]->ct1ca;
+
+ spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+
+ state->ct0ca = V3D_READ(V3D_CTNCA(0));
+ state->ct0ea = V3D_READ(V3D_CTNEA(0));
+
+ state->ct1ca = V3D_READ(V3D_CTNCA(1));
+ state->ct1ea = V3D_READ(V3D_CTNEA(1));
+
+ state->ct0cs = V3D_READ(V3D_CTNCS(0));
+ state->ct1cs = V3D_READ(V3D_CTNCS(1));
+
+ state->ct0ra0 = V3D_READ(V3D_CT00RA0);
+ state->ct1ra0 = V3D_READ(V3D_CT01RA0);
+
+ state->bpca = V3D_READ(V3D_BPCA);
+ state->bpcs = V3D_READ(V3D_BPCS);
+ state->bpoa = V3D_READ(V3D_BPOA);
+ state->bpos = V3D_READ(V3D_BPOS);
+
+ state->vpmbase = V3D_READ(V3D_VPMBASE);
+
+ state->dbge = V3D_READ(V3D_DBGE);
+ state->fdbgo = V3D_READ(V3D_FDBGO);
+ state->fdbgb = V3D_READ(V3D_FDBGB);
+ state->fdbgr = V3D_READ(V3D_FDBGR);
+ state->fdbgs = V3D_READ(V3D_FDBGS);
+ state->errstat = V3D_READ(V3D_ERRSTAT);
+
+ /* We need to turn purgeable BOs into unpurgeable ones so that
+ * userspace has a chance to dump the hang state before the kernel
+ * decides to purge those BOs.
+ * Note that BO consistency at dump time cannot be guaranteed. For
+ * example, if the owner of these BOs decides to re-use them or mark
+ * them purgeable again there's nothing we can do to prevent it.
+ */
+ for (i = 0; i < kernel_state->user_state.bo_count; i++) {
+ struct vc4_bo *bo = to_vc4_bo(kernel_state->bo[i]);
+
+ if (bo->madv == __VC4_MADV_NOTSUPP)
+ continue;
+
+ mutex_lock(&bo->madv_lock);
+ if (!WARN_ON(bo->madv == __VC4_MADV_PURGED))
+ bo->madv = VC4_MADV_WILLNEED;
+ refcount_dec(&bo->usecnt);
+ mutex_unlock(&bo->madv_lock);
+ }
+
+ spin_lock_irqsave(&vc4->job_lock, irqflags);
+ if (vc4->hang_state) {
+ spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+ vc4_free_hang_state(dev, kernel_state);
+ } else {
+ vc4->hang_state = kernel_state;
+ spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+ }
+}
+
+static void
+vc4_reset(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+ DRM_INFO("Resetting GPU.\n");
+
+ mutex_lock(&vc4->power_lock);
+ if (vc4->power_refcount) {
+ /* Power the device off and back on the by dropping the
+ * reference on runtime PM.
+ */
+ pm_runtime_put_sync_suspend(&vc4->v3d->pdev->dev);
+ pm_runtime_get_sync(&vc4->v3d->pdev->dev);
+ }
+ mutex_unlock(&vc4->power_lock);
+
+ vc4_irq_reset(dev);
+
+ /* Rearm the hangcheck -- another job might have been waiting
+ * for our hung one to get kicked off, and vc4_irq_reset()
+ * would have started it.
+ */
+ vc4_queue_hangcheck(dev);
+}
+
+static void
+vc4_reset_work(struct work_struct *work)
+{
+ struct vc4_dev *vc4 =
+ container_of(work, struct vc4_dev, hangcheck.reset_work);
+
+ vc4_save_hang_state(vc4->dev);
+
+ vc4_reset(vc4->dev);
+}
+
+static void
+vc4_hangcheck_elapsed(struct timer_list *t)
+{
+ struct vc4_dev *vc4 = from_timer(vc4, t, hangcheck.timer);
+ struct drm_device *dev = vc4->dev;
+ uint32_t ct0ca, ct1ca;
+ unsigned long irqflags;
+ struct vc4_exec_info *bin_exec, *render_exec;
+
+ spin_lock_irqsave(&vc4->job_lock, irqflags);
+
+ bin_exec = vc4_first_bin_job(vc4);
+ render_exec = vc4_first_render_job(vc4);
+
+ /* If idle, we can stop watching for hangs. */
+ if (!bin_exec && !render_exec) {
+ spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+ return;
+ }
+
+ ct0ca = V3D_READ(V3D_CTNCA(0));
+ ct1ca = V3D_READ(V3D_CTNCA(1));
+
+ /* If we've made any progress in execution, rearm the timer
+ * and wait.
+ */
+ if ((bin_exec && ct0ca != bin_exec->last_ct0ca) ||
+ (render_exec && ct1ca != render_exec->last_ct1ca)) {
+ if (bin_exec)
+ bin_exec->last_ct0ca = ct0ca;
+ if (render_exec)
+ render_exec->last_ct1ca = ct1ca;
+ spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+ vc4_queue_hangcheck(dev);
+ return;
+ }
+
+ spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+
+ /* We've gone too long with no progress, reset. This has to
+ * be done from a work struct, since resetting can sleep and
+ * this timer hook isn't allowed to.
+ */
+ schedule_work(&vc4->hangcheck.reset_work);
+}
+
+static void
+submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+ /* Set the current and end address of the control list.
+ * Writing the end register is what starts the job.
+ */
+ V3D_WRITE(V3D_CTNCA(thread), start);
+ V3D_WRITE(V3D_CTNEA(thread), end);
+}
+
+int
+vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns,
+ bool interruptible)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ int ret = 0;
+ unsigned long timeout_expire;
+ DEFINE_WAIT(wait);
+
+ if (vc4->finished_seqno >= seqno)
+ return 0;
+
+ if (timeout_ns == 0)
+ return -ETIME;
+
+ timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns);
+
+ trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns);
+ for (;;) {
+ prepare_to_wait(&vc4->job_wait_queue, &wait,
+ interruptible ? TASK_INTERRUPTIBLE :
+ TASK_UNINTERRUPTIBLE);
+
+ if (interruptible && signal_pending(current)) {
+ ret = -ERESTARTSYS;
+ break;
+ }
+
+ if (vc4->finished_seqno >= seqno)
+ break;
+
+ if (timeout_ns != ~0ull) {
+ if (time_after_eq(jiffies, timeout_expire)) {
+ ret = -ETIME;
+ break;
+ }
+ schedule_timeout(timeout_expire - jiffies);
+ } else {
+ schedule();
+ }
+ }
+
+ finish_wait(&vc4->job_wait_queue, &wait);
+ trace_vc4_wait_for_seqno_end(dev, seqno);
+
+ return ret;
+}
+
+static void
+vc4_flush_caches(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+ /* Flush the GPU L2 caches. These caches sit on top of system
+ * L3 (the 128kb or so shared with the CPU), and are
+ * non-allocating in the L3.
+ */
+ V3D_WRITE(V3D_L2CACTL,
+ V3D_L2CACTL_L2CCLR);
+
+ V3D_WRITE(V3D_SLCACTL,
+ VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) |
+ VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) |
+ VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) |
+ VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC));
+}
+
+static void
+vc4_flush_texture_caches(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+ V3D_WRITE(V3D_L2CACTL,
+ V3D_L2CACTL_L2CCLR);
+
+ V3D_WRITE(V3D_SLCACTL,
+ VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) |
+ VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC));
+}
+
+/* Sets the registers for the next job to be actually be executed in
+ * the hardware.
+ *
+ * The job_lock should be held during this.
+ */
+void
+vc4_submit_next_bin_job(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_exec_info *exec;
+
+again:
+ exec = vc4_first_bin_job(vc4);
+ if (!exec)
+ return;
+
+ vc4_flush_caches(dev);
+
+ /* Only start the perfmon if it was not already started by a previous
+ * job.
+ */
+ if (exec->perfmon && vc4->active_perfmon != exec->perfmon)
+ vc4_perfmon_start(vc4, exec->perfmon);
+
+ /* Either put the job in the binner if it uses the binner, or
+ * immediately move it to the to-be-rendered queue.
+ */
+ if (exec->ct0ca != exec->ct0ea) {
+ submit_cl(dev, 0, exec->ct0ca, exec->ct0ea);
+ } else {
+ struct vc4_exec_info *next;
+
+ vc4_move_job_to_render(dev, exec);
+ next = vc4_first_bin_job(vc4);
+
+ /* We can't start the next bin job if the previous job had a
+ * different perfmon instance attached to it. The same goes
+ * if one of them had a perfmon attached to it and the other
+ * one doesn't.
+ */
+ if (next && next->perfmon == exec->perfmon)
+ goto again;
+ }
+}
+
+void
+vc4_submit_next_render_job(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_exec_info *exec = vc4_first_render_job(vc4);
+
+ if (!exec)
+ return;
+
+ /* A previous RCL may have written to one of our textures, and
+ * our full cache flush at bin time may have occurred before
+ * that RCL completed. Flush the texture cache now, but not
+ * the instructions or uniforms (since we don't write those
+ * from an RCL).
+ */
+ vc4_flush_texture_caches(dev);
+
+ submit_cl(dev, 1, exec->ct1ca, exec->ct1ea);
+}
+
+void
+vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ bool was_empty = list_empty(&vc4->render_job_list);
+
+ list_move_tail(&exec->head, &vc4->render_job_list);
+ if (was_empty)
+ vc4_submit_next_render_job(dev);
+}
+
+static void
+vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
+{
+ struct vc4_bo *bo;
+ unsigned i;
+
+ for (i = 0; i < exec->bo_count; i++) {
+ bo = to_vc4_bo(&exec->bo[i]->base);
+ bo->seqno = seqno;
+
+ reservation_object_add_shared_fence(bo->resv, exec->fence);
+ }
+
+ list_for_each_entry(bo, &exec->unref_list, unref_head) {
+ bo->seqno = seqno;
+ }
+
+ for (i = 0; i < exec->rcl_write_bo_count; i++) {
+ bo = to_vc4_bo(&exec->rcl_write_bo[i]->base);
+ bo->write_seqno = seqno;
+
+ reservation_object_add_excl_fence(bo->resv, exec->fence);
+ }
+}
+
+static void
+vc4_unlock_bo_reservations(struct drm_device *dev,
+ struct vc4_exec_info *exec,
+ struct ww_acquire_ctx *acquire_ctx)
+{
+ int i;
+
+ for (i = 0; i < exec->bo_count; i++) {
+ struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base);
+
+ ww_mutex_unlock(&bo->resv->lock);
+ }
+
+ ww_acquire_fini(acquire_ctx);
+}
+
+/* Takes the reservation lock on all the BOs being referenced, so that
+ * at queue submit time we can update the reservations.
+ *
+ * We don't lock the RCL the tile alloc/state BOs, or overflow memory
+ * (all of which are on exec->unref_list). They're entirely private
+ * to vc4, so we don't attach dma-buf fences to them.
+ */
+static int
+vc4_lock_bo_reservations(struct drm_device *dev,
+ struct vc4_exec_info *exec,
+ struct ww_acquire_ctx *acquire_ctx)
+{
+ int contended_lock = -1;
+ int i, ret;
+ struct vc4_bo *bo;
+
+ ww_acquire_init(acquire_ctx, &reservation_ww_class);
+
+retry:
+ if (contended_lock != -1) {
+ bo = to_vc4_bo(&exec->bo[contended_lock]->base);
+ ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock,
+ acquire_ctx);
+ if (ret) {
+ ww_acquire_done(acquire_ctx);
+ return ret;
+ }
+ }
+
+ for (i = 0; i < exec->bo_count; i++) {
+ if (i == contended_lock)
+ continue;
+
+ bo = to_vc4_bo(&exec->bo[i]->base);
+
+ ret = ww_mutex_lock_interruptible(&bo->resv->lock, acquire_ctx);
+ if (ret) {
+ int j;
+
+ for (j = 0; j < i; j++) {
+ bo = to_vc4_bo(&exec->bo[j]->base);
+ ww_mutex_unlock(&bo->resv->lock);
+ }
+
+ if (contended_lock != -1 && contended_lock >= i) {
+ bo = to_vc4_bo(&exec->bo[contended_lock]->base);
+
+ ww_mutex_unlock(&bo->resv->lock);
+ }
+
+ if (ret == -EDEADLK) {
+ contended_lock = i;
+ goto retry;
+ }
+
+ ww_acquire_done(acquire_ctx);
+ return ret;
+ }
+ }
+
+ ww_acquire_done(acquire_ctx);
+
+ /* Reserve space for our shared (read-only) fence references,
+ * before we commit the CL to the hardware.
+ */
+ for (i = 0; i < exec->bo_count; i++) {
+ bo = to_vc4_bo(&exec->bo[i]->base);
+
+ ret = reservation_object_reserve_shared(bo->resv);
+ if (ret) {
+ vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/* Queues a struct vc4_exec_info for execution. If no job is
+ * currently executing, then submits it.
+ *
+ * Unlike most GPUs, our hardware only handles one command list at a
+ * time. To queue multiple jobs at once, we'd need to edit the
+ * previous command list to have a jump to the new one at the end, and
+ * then bump the end address. That's a change for a later date,
+ * though.
+ */
+static int
+vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
+ struct ww_acquire_ctx *acquire_ctx,
+ struct drm_syncobj *out_sync)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_exec_info *renderjob;
+ uint64_t seqno;
+ unsigned long irqflags;
+ struct vc4_fence *fence;
+
+ fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+ if (!fence)
+ return -ENOMEM;
+ fence->dev = dev;
+
+ spin_lock_irqsave(&vc4->job_lock, irqflags);
+
+ seqno = ++vc4->emit_seqno;
+ exec->seqno = seqno;
+
+ dma_fence_init(&fence->base, &vc4_fence_ops, &vc4->job_lock,
+ vc4->dma_fence_context, exec->seqno);
+ fence->seqno = exec->seqno;
+ exec->fence = &fence->base;
+
+ if (out_sync)
+ drm_syncobj_replace_fence(out_sync, exec->fence);
+
+ vc4_update_bo_seqnos(exec, seqno);
+
+ vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
+
+ list_add_tail(&exec->head, &vc4->bin_job_list);
+
+ /* If no bin job was executing and if the render job (if any) has the
+ * same perfmon as our job attached to it (or if both jobs don't have
+ * perfmon activated), then kick ours off. Otherwise, it'll get
+ * started when the previous job's flush/render done interrupt occurs.
+ */
+ renderjob = vc4_first_render_job(vc4);
+ if (vc4_first_bin_job(vc4) == exec &&
+ (!renderjob || renderjob->perfmon == exec->perfmon)) {
+ vc4_submit_next_bin_job(dev);
+ vc4_queue_hangcheck(dev);
+ }
+
+ spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+
+ return 0;
+}
+
+/**
+ * vc4_cl_lookup_bos() - Sets up exec->bo[] with the GEM objects
+ * referenced by the job.
+ * @dev: DRM device
+ * @file_priv: DRM file for this fd
+ * @exec: V3D job being set up
+ *
+ * The command validator needs to reference BOs by their index within
+ * the submitted job's BO list. This does the validation of the job's
+ * BO list and reference counting for the lifetime of the job.
+ */
+static int
+vc4_cl_lookup_bos(struct drm_device *dev,
+ struct drm_file *file_priv,
+ struct vc4_exec_info *exec)
+{
+ struct drm_vc4_submit_cl *args = exec->args;
+ uint32_t *handles;
+ int ret = 0;
+ int i;
+
+ exec->bo_count = args->bo_handle_count;
+
+ if (!exec->bo_count) {
+ /* See comment on bo_index for why we have to check
+ * this.
+ */
+ DRM_DEBUG("Rendering requires BOs to validate\n");
+ return -EINVAL;
+ }
+
+ exec->bo = kvmalloc_array(exec->bo_count,
+ sizeof(struct drm_gem_cma_object *),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!exec->bo) {
+ DRM_ERROR("Failed to allocate validated BO pointers\n");
+ return -ENOMEM;
+ }
+
+ handles = kvmalloc_array(exec->bo_count, sizeof(uint32_t), GFP_KERNEL);
+ if (!handles) {
+ ret = -ENOMEM;
+ DRM_ERROR("Failed to allocate incoming GEM handles\n");
+ goto fail;
+ }
+
+ if (copy_from_user(handles, u64_to_user_ptr(args->bo_handles),
+ exec->bo_count * sizeof(uint32_t))) {
+ ret = -EFAULT;
+ DRM_ERROR("Failed to copy in GEM handles\n");
+ goto fail;
+ }
+
+ spin_lock(&file_priv->table_lock);
+ for (i = 0; i < exec->bo_count; i++) {
+ struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
+ handles[i]);
+ if (!bo) {
+ DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
+ i, handles[i]);
+ ret = -EINVAL;
+ break;
+ }
+
+ drm_gem_object_get(bo);
+ exec->bo[i] = (struct drm_gem_cma_object *)bo;
+ }
+ spin_unlock(&file_priv->table_lock);
+
+ if (ret)
+ goto fail_put_bo;
+
+ for (i = 0; i < exec->bo_count; i++) {
+ ret = vc4_bo_inc_usecnt(to_vc4_bo(&exec->bo[i]->base));
+ if (ret)
+ goto fail_dec_usecnt;
+ }
+
+ kvfree(handles);
+ return 0;
+
+fail_dec_usecnt:
+ /* Decrease usecnt on acquired objects.
+ * We cannot rely on vc4_complete_exec() to release resources here,
+ * because vc4_complete_exec() has no information about which BO has
+ * had its ->usecnt incremented.
+ * To make things easier we just free everything explicitly and set
+ * exec->bo to NULL so that vc4_complete_exec() skips the 'BO release'
+ * step.
+ */
+ for (i-- ; i >= 0; i--)
+ vc4_bo_dec_usecnt(to_vc4_bo(&exec->bo[i]->base));
+
+fail_put_bo:
+ /* Release any reference to acquired objects. */
+ for (i = 0; i < exec->bo_count && exec->bo[i]; i++)
+ drm_gem_object_put_unlocked(&exec->bo[i]->base);
+
+fail:
+ kvfree(handles);
+ kvfree(exec->bo);
+ exec->bo = NULL;
+ return ret;
+}
+
+static int
+vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
+{
+ struct drm_vc4_submit_cl *args = exec->args;
+ void *temp = NULL;
+ void *bin;
+ int ret = 0;
+ uint32_t bin_offset = 0;
+ uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size,
+ 16);
+ uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size;
+ uint32_t exec_size = uniforms_offset + args->uniforms_size;
+ uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) *
+ args->shader_rec_count);
+ struct vc4_bo *bo;
+
+ if (shader_rec_offset < args->bin_cl_size ||
+ uniforms_offset < shader_rec_offset ||
+ exec_size < uniforms_offset ||
+ args->shader_rec_count >= (UINT_MAX /
+ sizeof(struct vc4_shader_state)) ||
+ temp_size < exec_size) {
+ DRM_DEBUG("overflow in exec arguments\n");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* Allocate space where we'll store the copied in user command lists
+ * and shader records.
+ *
+ * We don't just copy directly into the BOs because we need to
+ * read the contents back for validation, and I think the
+ * bo->vaddr is uncached access.
+ */
+ temp = kvmalloc_array(temp_size, 1, GFP_KERNEL);
+ if (!temp) {
+ DRM_ERROR("Failed to allocate storage for copying "
+ "in bin/render CLs.\n");
+ ret = -ENOMEM;
+ goto fail;
+ }
+ bin = temp + bin_offset;
+ exec->shader_rec_u = temp + shader_rec_offset;
+ exec->uniforms_u = temp + uniforms_offset;
+ exec->shader_state = temp + exec_size;
+ exec->shader_state_size = args->shader_rec_count;
+
+ if (copy_from_user(bin,
+ u64_to_user_ptr(args->bin_cl),
+ args->bin_cl_size)) {
+ ret = -EFAULT;
+ goto fail;
+ }
+
+ if (copy_from_user(exec->shader_rec_u,
+ u64_to_user_ptr(args->shader_rec),
+ args->shader_rec_size)) {
+ ret = -EFAULT;
+ goto fail;
+ }
+
+ if (copy_from_user(exec->uniforms_u,
+ u64_to_user_ptr(args->uniforms),
+ args->uniforms_size)) {
+ ret = -EFAULT;
+ goto fail;
+ }
+
+ bo = vc4_bo_create(dev, exec_size, true, VC4_BO_TYPE_BCL);
+ if (IS_ERR(bo)) {
+ DRM_ERROR("Couldn't allocate BO for binning\n");
+ ret = PTR_ERR(bo);
+ goto fail;
+ }
+ exec->exec_bo = &bo->base;
+
+ list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head,
+ &exec->unref_list);
+
+ exec->ct0ca = exec->exec_bo->paddr + bin_offset;
+
+ exec->bin_u = bin;
+
+ exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset;
+ exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset;
+ exec->shader_rec_size = args->shader_rec_size;
+
+ exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset;
+ exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset;
+ exec->uniforms_size = args->uniforms_size;
+
+ ret = vc4_validate_bin_cl(dev,
+ exec->exec_bo->vaddr + bin_offset,
+ bin,
+ exec);
+ if (ret)
+ goto fail;
+
+ ret = vc4_validate_shader_recs(dev, exec);
+ if (ret)
+ goto fail;
+
+ /* Block waiting on any previous rendering into the CS's VBO,
+ * IB, or textures, so that pixels are actually written by the
+ * time we try to read them.
+ */
+ ret = vc4_wait_for_seqno(dev, exec->bin_dep_seqno, ~0ull, true);
+
+fail:
+ kvfree(temp);
+ return ret;
+}
+
+static void
+vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ unsigned long irqflags;
+ unsigned i;
+
+ /* If we got force-completed because of GPU reset rather than
+ * through our IRQ handler, signal the fence now.
+ */
+ if (exec->fence) {
+ dma_fence_signal(exec->fence);
+ dma_fence_put(exec->fence);
+ }
+
+ if (exec->bo) {
+ for (i = 0; i < exec->bo_count; i++) {
+ struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base);
+
+ vc4_bo_dec_usecnt(bo);
+ drm_gem_object_put_unlocked(&exec->bo[i]->base);
+ }
+ kvfree(exec->bo);
+ }
+
+ while (!list_empty(&exec->unref_list)) {
+ struct vc4_bo *bo = list_first_entry(&exec->unref_list,
+ struct vc4_bo, unref_head);
+ list_del(&bo->unref_head);
+ drm_gem_object_put_unlocked(&bo->base.base);
+ }
+
+ /* Free up the allocation of any bin slots we used. */
+ spin_lock_irqsave(&vc4->job_lock, irqflags);
+ vc4->bin_alloc_used &= ~exec->bin_slots;
+ spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+
+ /* Release the reference we had on the perf monitor. */
+ vc4_perfmon_put(exec->perfmon);
+
+ mutex_lock(&vc4->power_lock);
+ if (--vc4->power_refcount == 0) {
+ pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev);
+ pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev);
+ }
+ mutex_unlock(&vc4->power_lock);
+
+ kfree(exec);
+}
+
+void
+vc4_job_handle_completed(struct vc4_dev *vc4)
+{
+ unsigned long irqflags;
+ struct vc4_seqno_cb *cb, *cb_temp;
+
+ spin_lock_irqsave(&vc4->job_lock, irqflags);
+ while (!list_empty(&vc4->job_done_list)) {
+ struct vc4_exec_info *exec =
+ list_first_entry(&vc4->job_done_list,
+ struct vc4_exec_info, head);
+ list_del(&exec->head);
+
+ spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+ vc4_complete_exec(vc4->dev, exec);
+ spin_lock_irqsave(&vc4->job_lock, irqflags);
+ }
+
+ list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) {
+ if (cb->seqno <= vc4->finished_seqno) {
+ list_del_init(&cb->work.entry);
+ schedule_work(&cb->work);
+ }
+ }
+
+ spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+}
+
+static void vc4_seqno_cb_work(struct work_struct *work)
+{
+ struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work);
+
+ cb->func(cb);
+}
+
+int vc4_queue_seqno_cb(struct drm_device *dev,
+ struct vc4_seqno_cb *cb, uint64_t seqno,
+ void (*func)(struct vc4_seqno_cb *cb))
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ int ret = 0;
+ unsigned long irqflags;
+
+ cb->func = func;
+ INIT_WORK(&cb->work, vc4_seqno_cb_work);
+
+ spin_lock_irqsave(&vc4->job_lock, irqflags);
+ if (seqno > vc4->finished_seqno) {
+ cb->seqno = seqno;
+ list_add_tail(&cb->work.entry, &vc4->seqno_cb_list);
+ } else {
+ schedule_work(&cb->work);
+ }
+ spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+
+ return ret;
+}
+
+/* Scheduled when any job has been completed, this walks the list of
+ * jobs that had completed and unrefs their BOs and frees their exec
+ * structs.
+ */
+static void
+vc4_job_done_work(struct work_struct *work)
+{
+ struct vc4_dev *vc4 =
+ container_of(work, struct vc4_dev, job_done_work);
+
+ vc4_job_handle_completed(vc4);
+}
+
+static int
+vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev,
+ uint64_t seqno,
+ uint64_t *timeout_ns)
+{
+ unsigned long start = jiffies;
+ int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true);
+
+ if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) {
+ uint64_t delta = jiffies_to_nsecs(jiffies - start);
+
+ if (*timeout_ns >= delta)
+ *timeout_ns -= delta;
+ }
+
+ return ret;
+}
+
+int
+vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_vc4_wait_seqno *args = data;
+
+ return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno,
+ &args->timeout_ns);
+}
+
+int
+vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ int ret;
+ struct drm_vc4_wait_bo *args = data;
+ struct drm_gem_object *gem_obj;
+ struct vc4_bo *bo;
+
+ if (args->pad != 0)
+ return -EINVAL;
+
+ gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+ if (!gem_obj) {
+ DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
+ return -EINVAL;
+ }
+ bo = to_vc4_bo(gem_obj);
+
+ ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno,
+ &args->timeout_ns);
+
+ drm_gem_object_put_unlocked(gem_obj);
+ return ret;
+}
+
+/**
+ * vc4_submit_cl_ioctl() - Submits a job (frame) to the VC4.
+ * @dev: DRM device
+ * @data: ioctl argument
+ * @file_priv: DRM file for this fd
+ *
+ * This is the main entrypoint for userspace to submit a 3D frame to
+ * the GPU. Userspace provides the binner command list (if
+ * applicable), and the kernel sets up the render command list to draw
+ * to the framebuffer described in the ioctl, using the command lists
+ * that the 3D engine's binner will produce.
+ */
+int
+vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_file *vc4file = file_priv->driver_priv;
+ struct drm_vc4_submit_cl *args = data;
+ struct drm_syncobj *out_sync = NULL;
+ struct vc4_exec_info *exec;
+ struct ww_acquire_ctx acquire_ctx;
+ struct dma_fence *in_fence;
+ int ret = 0;
+
+ if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR |
+ VC4_SUBMIT_CL_FIXED_RCL_ORDER |
+ VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X |
+ VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) != 0) {
+ DRM_DEBUG("Unknown flags: 0x%02x\n", args->flags);
+ return -EINVAL;
+ }
+
+ if (args->pad2 != 0) {
+ DRM_DEBUG("Invalid pad: 0x%08x\n", args->pad2);
+ return -EINVAL;
+ }
+
+ exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
+ if (!exec) {
+ DRM_ERROR("malloc failure on exec struct\n");
+ return -ENOMEM;
+ }
+
+ mutex_lock(&vc4->power_lock);
+ if (vc4->power_refcount++ == 0) {
+ ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
+ if (ret < 0) {
+ mutex_unlock(&vc4->power_lock);
+ vc4->power_refcount--;
+ kfree(exec);
+ return ret;
+ }
+ }
+ mutex_unlock(&vc4->power_lock);
+
+ exec->args = args;
+ INIT_LIST_HEAD(&exec->unref_list);
+
+ ret = vc4_cl_lookup_bos(dev, file_priv, exec);
+ if (ret)
+ goto fail;
+
+ if (args->perfmonid) {
+ exec->perfmon = vc4_perfmon_find(vc4file,
+ args->perfmonid);
+ if (!exec->perfmon) {
+ ret = -ENOENT;
+ goto fail;
+ }
+ }
+
+ if (args->in_sync) {
+ ret = drm_syncobj_find_fence(file_priv, args->in_sync,
+ &in_fence);
+ if (ret)
+ goto fail;
+
+ /* When the fence (or fence array) is exclusively from our
+ * context we can skip the wait since jobs are executed in
+ * order of their submission through this ioctl and this can
+ * only have fences from a prior job.
+ */
+ if (!dma_fence_match_context(in_fence,
+ vc4->dma_fence_context)) {
+ ret = dma_fence_wait(in_fence, true);
+ if (ret) {
+ dma_fence_put(in_fence);
+ goto fail;
+ }
+ }
+
+ dma_fence_put(in_fence);
+ }
+
+ if (exec->args->bin_cl_size != 0) {
+ ret = vc4_get_bcl(dev, exec);
+ if (ret)
+ goto fail;
+ } else {
+ exec->ct0ca = 0;
+ exec->ct0ea = 0;
+ }
+
+ ret = vc4_get_rcl(dev, exec);
+ if (ret)
+ goto fail;
+
+ ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx);
+ if (ret)
+ goto fail;
+
+ if (args->out_sync) {
+ out_sync = drm_syncobj_find(file_priv, args->out_sync);
+ if (!out_sync) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* We replace the fence in out_sync in vc4_queue_submit since
+ * the render job could execute immediately after that call.
+ * If it finishes before our ioctl processing resumes the
+ * render job fence could already have been freed.
+ */
+ }
+
+ /* Clear this out of the struct we'll be putting in the queue,
+ * since it's part of our stack.
+ */
+ exec->args = NULL;
+
+ ret = vc4_queue_submit(dev, exec, &acquire_ctx, out_sync);
+
+ /* The syncobj isn't part of the exec data and we need to free our
+ * reference even if job submission failed.
+ */
+ if (out_sync)
+ drm_syncobj_put(out_sync);
+
+ if (ret)
+ goto fail;
+
+ /* Return the seqno for our job. */
+ args->seqno = vc4->emit_seqno;
+
+ return 0;
+
+fail:
+ vc4_complete_exec(vc4->dev, exec);
+
+ return ret;
+}
+
+void
+vc4_gem_init(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+ vc4->dma_fence_context = dma_fence_context_alloc(1);
+
+ INIT_LIST_HEAD(&vc4->bin_job_list);
+ INIT_LIST_HEAD(&vc4->render_job_list);
+ INIT_LIST_HEAD(&vc4->job_done_list);
+ INIT_LIST_HEAD(&vc4->seqno_cb_list);
+ spin_lock_init(&vc4->job_lock);
+
+ INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work);
+ timer_setup(&vc4->hangcheck.timer, vc4_hangcheck_elapsed, 0);
+
+ INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
+
+ mutex_init(&vc4->power_lock);
+
+ INIT_LIST_HEAD(&vc4->purgeable.list);
+ mutex_init(&vc4->purgeable.lock);
+}
+
+void
+vc4_gem_destroy(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+ /* Waiting for exec to finish would need to be done before
+ * unregistering V3D.
+ */
+ WARN_ON(vc4->emit_seqno != vc4->finished_seqno);
+
+ /* V3D should already have disabled its interrupt and cleared
+ * the overflow allocation registers. Now free the object.
+ */
+ if (vc4->bin_bo) {
+ drm_gem_object_put_unlocked(&vc4->bin_bo->base.base);
+ vc4->bin_bo = NULL;
+ }
+
+ if (vc4->hang_state)
+ vc4_free_hang_state(dev, vc4->hang_state);
+}
+
+int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_vc4_gem_madvise *args = data;
+ struct drm_gem_object *gem_obj;
+ struct vc4_bo *bo;
+ int ret;
+
+ switch (args->madv) {
+ case VC4_MADV_DONTNEED:
+ case VC4_MADV_WILLNEED:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (args->pad != 0)
+ return -EINVAL;
+
+ gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+ if (!gem_obj) {
+ DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
+ return -ENOENT;
+ }
+
+ bo = to_vc4_bo(gem_obj);
+
+ /* Only BOs exposed to userspace can be purged. */
+ if (bo->madv == __VC4_MADV_NOTSUPP) {
+ DRM_DEBUG("madvise not supported on this BO\n");
+ ret = -EINVAL;
+ goto out_put_gem;
+ }
+
+ /* Not sure it's safe to purge imported BOs. Let's just assume it's
+ * not until proven otherwise.
+ */
+ if (gem_obj->import_attach) {
+ DRM_DEBUG("madvise not supported on imported BOs\n");
+ ret = -EINVAL;
+ goto out_put_gem;
+ }
+
+ mutex_lock(&bo->madv_lock);
+
+ if (args->madv == VC4_MADV_DONTNEED && bo->madv == VC4_MADV_WILLNEED &&
+ !refcount_read(&bo->usecnt)) {
+ /* If the BO is about to be marked as purgeable, is not used
+ * and is not already purgeable or purged, add it to the
+ * purgeable list.
+ */
+ vc4_bo_add_to_purgeable_pool(bo);
+ } else if (args->madv == VC4_MADV_WILLNEED &&
+ bo->madv == VC4_MADV_DONTNEED &&
+ !refcount_read(&bo->usecnt)) {
+ /* The BO has not been purged yet, just remove it from
+ * the purgeable list.
+ */
+ vc4_bo_remove_from_purgeable_pool(bo);
+ }
+
+ /* Save the purged state. */
+ args->retained = bo->madv != __VC4_MADV_PURGED;
+
+ /* Update internal madv state only if the bo was not purged. */
+ if (bo->madv != __VC4_MADV_PURGED)
+ bo->madv = args->madv;
+
+ mutex_unlock(&bo->madv_lock);
+
+ ret = 0;
+
+out_put_gem:
+ drm_gem_object_put_unlocked(gem_obj);
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
new file mode 100644
index 000000000..116166266
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -0,0 +1,1520 @@
+/*
+ * Copyright (C) 2015 Broadcom
+ * Copyright (c) 2014 The Linux Foundation. All rights reserved.
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * DOC: VC4 Falcon HDMI module
+ *
+ * The HDMI core has a state machine and a PHY. On BCM2835, most of
+ * the unit operates off of the HSM clock from CPRMAN. It also
+ * internally uses the PLLH_PIX clock for the PHY.
+ *
+ * HDMI infoframes are kept within a small packet ram, where each
+ * packet can be individually enabled for including in a frame.
+ *
+ * HDMI audio is implemented entirely within the HDMI IP block. A
+ * register in the HDMI encoder takes SPDIF frames from the DMA engine
+ * and transfers them over an internal MAI (multi-channel audio
+ * interconnect) bus to the encoder side for insertion into the video
+ * blank regions.
+ *
+ * The driver's HDMI encoder does not yet support power management.
+ * The HDMI encoder's power domain and the HSM/pixel clocks are kept
+ * continuously running, and only the HDMI logic and packet ram are
+ * powered off/on at disable/enable time.
+ *
+ * The driver does not yet support CEC control, though the HDMI
+ * encoder block has CEC support.
+ */
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_edid.h>
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/i2c.h>
+#include <linux/of_address.h>
+#include <linux/of_gpio.h>
+#include <linux/of_platform.h>
+#include <linux/pm_runtime.h>
+#include <linux/rational.h>
+#include <sound/dmaengine_pcm.h>
+#include <sound/pcm_drm_eld.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include "media/cec.h"
+#include "vc4_drv.h"
+#include "vc4_regs.h"
+
+#define HSM_CLOCK_FREQ 163682864
+#define CEC_CLOCK_FREQ 40000
+#define CEC_CLOCK_DIV (HSM_CLOCK_FREQ / CEC_CLOCK_FREQ)
+
+/* HDMI audio information */
+struct vc4_hdmi_audio {
+ struct snd_soc_card card;
+ struct snd_soc_dai_link link;
+ int samplerate;
+ int channels;
+ struct snd_dmaengine_dai_dma_data dma_data;
+ struct snd_pcm_substream *substream;
+};
+
+/* General HDMI hardware state. */
+struct vc4_hdmi {
+ struct platform_device *pdev;
+
+ struct drm_encoder *encoder;
+ struct drm_connector *connector;
+
+ struct vc4_hdmi_audio audio;
+
+ struct i2c_adapter *ddc;
+ void __iomem *hdmicore_regs;
+ void __iomem *hd_regs;
+ int hpd_gpio;
+ bool hpd_active_low;
+
+ struct cec_adapter *cec_adap;
+ struct cec_msg cec_rx_msg;
+ bool cec_tx_ok;
+ bool cec_irq_was_rx;
+
+ struct clk *pixel_clock;
+ struct clk *hsm_clock;
+};
+
+#define HDMI_READ(offset) readl(vc4->hdmi->hdmicore_regs + offset)
+#define HDMI_WRITE(offset, val) writel(val, vc4->hdmi->hdmicore_regs + offset)
+#define HD_READ(offset) readl(vc4->hdmi->hd_regs + offset)
+#define HD_WRITE(offset, val) writel(val, vc4->hdmi->hd_regs + offset)
+
+/* VC4 HDMI encoder KMS struct */
+struct vc4_hdmi_encoder {
+ struct vc4_encoder base;
+ bool hdmi_monitor;
+ bool limited_rgb_range;
+ bool rgb_range_selectable;
+};
+
+static inline struct vc4_hdmi_encoder *
+to_vc4_hdmi_encoder(struct drm_encoder *encoder)
+{
+ return container_of(encoder, struct vc4_hdmi_encoder, base.base);
+}
+
+/* VC4 HDMI connector KMS struct */
+struct vc4_hdmi_connector {
+ struct drm_connector base;
+
+ /* Since the connector is attached to just the one encoder,
+ * this is the reference to it so we can do the best_encoder()
+ * hook.
+ */
+ struct drm_encoder *encoder;
+};
+
+static inline struct vc4_hdmi_connector *
+to_vc4_hdmi_connector(struct drm_connector *connector)
+{
+ return container_of(connector, struct vc4_hdmi_connector, base);
+}
+
+#define HDMI_REG(reg) { reg, #reg }
+static const struct {
+ u32 reg;
+ const char *name;
+} hdmi_regs[] = {
+ HDMI_REG(VC4_HDMI_CORE_REV),
+ HDMI_REG(VC4_HDMI_SW_RESET_CONTROL),
+ HDMI_REG(VC4_HDMI_HOTPLUG_INT),
+ HDMI_REG(VC4_HDMI_HOTPLUG),
+ HDMI_REG(VC4_HDMI_MAI_CHANNEL_MAP),
+ HDMI_REG(VC4_HDMI_MAI_CONFIG),
+ HDMI_REG(VC4_HDMI_MAI_FORMAT),
+ HDMI_REG(VC4_HDMI_AUDIO_PACKET_CONFIG),
+ HDMI_REG(VC4_HDMI_RAM_PACKET_CONFIG),
+ HDMI_REG(VC4_HDMI_HORZA),
+ HDMI_REG(VC4_HDMI_HORZB),
+ HDMI_REG(VC4_HDMI_FIFO_CTL),
+ HDMI_REG(VC4_HDMI_SCHEDULER_CONTROL),
+ HDMI_REG(VC4_HDMI_VERTA0),
+ HDMI_REG(VC4_HDMI_VERTA1),
+ HDMI_REG(VC4_HDMI_VERTB0),
+ HDMI_REG(VC4_HDMI_VERTB1),
+ HDMI_REG(VC4_HDMI_TX_PHY_RESET_CTL),
+ HDMI_REG(VC4_HDMI_TX_PHY_CTL0),
+
+ HDMI_REG(VC4_HDMI_CEC_CNTRL_1),
+ HDMI_REG(VC4_HDMI_CEC_CNTRL_2),
+ HDMI_REG(VC4_HDMI_CEC_CNTRL_3),
+ HDMI_REG(VC4_HDMI_CEC_CNTRL_4),
+ HDMI_REG(VC4_HDMI_CEC_CNTRL_5),
+ HDMI_REG(VC4_HDMI_CPU_STATUS),
+ HDMI_REG(VC4_HDMI_CPU_MASK_STATUS),
+
+ HDMI_REG(VC4_HDMI_CEC_RX_DATA_1),
+ HDMI_REG(VC4_HDMI_CEC_RX_DATA_2),
+ HDMI_REG(VC4_HDMI_CEC_RX_DATA_3),
+ HDMI_REG(VC4_HDMI_CEC_RX_DATA_4),
+ HDMI_REG(VC4_HDMI_CEC_TX_DATA_1),
+ HDMI_REG(VC4_HDMI_CEC_TX_DATA_2),
+ HDMI_REG(VC4_HDMI_CEC_TX_DATA_3),
+ HDMI_REG(VC4_HDMI_CEC_TX_DATA_4),
+};
+
+static const struct {
+ u32 reg;
+ const char *name;
+} hd_regs[] = {
+ HDMI_REG(VC4_HD_M_CTL),
+ HDMI_REG(VC4_HD_MAI_CTL),
+ HDMI_REG(VC4_HD_MAI_THR),
+ HDMI_REG(VC4_HD_MAI_FMT),
+ HDMI_REG(VC4_HD_MAI_SMP),
+ HDMI_REG(VC4_HD_VID_CTL),
+ HDMI_REG(VC4_HD_CSC_CTL),
+ HDMI_REG(VC4_HD_FRAME_COUNT),
+};
+
+#ifdef CONFIG_DEBUG_FS
+int vc4_hdmi_debugfs_regs(struct seq_file *m, void *unused)
+{
+ struct drm_info_node *node = (struct drm_info_node *)m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(hdmi_regs); i++) {
+ seq_printf(m, "%s (0x%04x): 0x%08x\n",
+ hdmi_regs[i].name, hdmi_regs[i].reg,
+ HDMI_READ(hdmi_regs[i].reg));
+ }
+
+ for (i = 0; i < ARRAY_SIZE(hd_regs); i++) {
+ seq_printf(m, "%s (0x%04x): 0x%08x\n",
+ hd_regs[i].name, hd_regs[i].reg,
+ HD_READ(hd_regs[i].reg));
+ }
+
+ return 0;
+}
+#endif /* CONFIG_DEBUG_FS */
+
+static void vc4_hdmi_dump_regs(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(hdmi_regs); i++) {
+ DRM_INFO("0x%04x (%s): 0x%08x\n",
+ hdmi_regs[i].reg, hdmi_regs[i].name,
+ HDMI_READ(hdmi_regs[i].reg));
+ }
+ for (i = 0; i < ARRAY_SIZE(hd_regs); i++) {
+ DRM_INFO("0x%04x (%s): 0x%08x\n",
+ hd_regs[i].reg, hd_regs[i].name,
+ HD_READ(hd_regs[i].reg));
+ }
+}
+
+static enum drm_connector_status
+vc4_hdmi_connector_detect(struct drm_connector *connector, bool force)
+{
+ struct drm_device *dev = connector->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+ if (vc4->hdmi->hpd_gpio) {
+ if (gpio_get_value_cansleep(vc4->hdmi->hpd_gpio) ^
+ vc4->hdmi->hpd_active_low)
+ return connector_status_connected;
+ cec_phys_addr_invalidate(vc4->hdmi->cec_adap);
+ return connector_status_disconnected;
+ }
+
+ if (drm_probe_ddc(vc4->hdmi->ddc))
+ return connector_status_connected;
+
+ if (HDMI_READ(VC4_HDMI_HOTPLUG) & VC4_HDMI_HOTPLUG_CONNECTED)
+ return connector_status_connected;
+ cec_phys_addr_invalidate(vc4->hdmi->cec_adap);
+ return connector_status_disconnected;
+}
+
+static void vc4_hdmi_connector_destroy(struct drm_connector *connector)
+{
+ drm_connector_unregister(connector);
+ drm_connector_cleanup(connector);
+}
+
+static int vc4_hdmi_connector_get_modes(struct drm_connector *connector)
+{
+ struct vc4_hdmi_connector *vc4_connector =
+ to_vc4_hdmi_connector(connector);
+ struct drm_encoder *encoder = vc4_connector->encoder;
+ struct vc4_hdmi_encoder *vc4_encoder = to_vc4_hdmi_encoder(encoder);
+ struct drm_device *dev = connector->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ int ret = 0;
+ struct edid *edid;
+
+ edid = drm_get_edid(connector, vc4->hdmi->ddc);
+ cec_s_phys_addr_from_edid(vc4->hdmi->cec_adap, edid);
+ if (!edid)
+ return -ENODEV;
+
+ vc4_encoder->hdmi_monitor = drm_detect_hdmi_monitor(edid);
+
+ if (edid && edid->input & DRM_EDID_INPUT_DIGITAL) {
+ vc4_encoder->rgb_range_selectable =
+ drm_rgb_quant_range_selectable(edid);
+ }
+
+ drm_connector_update_edid_property(connector, edid);
+ ret = drm_add_edid_modes(connector, edid);
+ kfree(edid);
+
+ return ret;
+}
+
+static const struct drm_connector_funcs vc4_hdmi_connector_funcs = {
+ .detect = vc4_hdmi_connector_detect,
+ .fill_modes = drm_helper_probe_single_connector_modes,
+ .destroy = vc4_hdmi_connector_destroy,
+ .reset = drm_atomic_helper_connector_reset,
+ .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+ .atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+static const struct drm_connector_helper_funcs vc4_hdmi_connector_helper_funcs = {
+ .get_modes = vc4_hdmi_connector_get_modes,
+};
+
+static struct drm_connector *vc4_hdmi_connector_init(struct drm_device *dev,
+ struct drm_encoder *encoder)
+{
+ struct drm_connector *connector;
+ struct vc4_hdmi_connector *hdmi_connector;
+
+ hdmi_connector = devm_kzalloc(dev->dev, sizeof(*hdmi_connector),
+ GFP_KERNEL);
+ if (!hdmi_connector)
+ return ERR_PTR(-ENOMEM);
+ connector = &hdmi_connector->base;
+
+ hdmi_connector->encoder = encoder;
+
+ drm_connector_init(dev, connector, &vc4_hdmi_connector_funcs,
+ DRM_MODE_CONNECTOR_HDMIA);
+ drm_connector_helper_add(connector, &vc4_hdmi_connector_helper_funcs);
+
+ connector->polled = (DRM_CONNECTOR_POLL_CONNECT |
+ DRM_CONNECTOR_POLL_DISCONNECT);
+
+ connector->interlace_allowed = 1;
+ connector->doublescan_allowed = 0;
+
+ drm_connector_attach_encoder(connector, encoder);
+
+ return connector;
+}
+
+static void vc4_hdmi_encoder_destroy(struct drm_encoder *encoder)
+{
+ drm_encoder_cleanup(encoder);
+}
+
+static const struct drm_encoder_funcs vc4_hdmi_encoder_funcs = {
+ .destroy = vc4_hdmi_encoder_destroy,
+};
+
+static int vc4_hdmi_stop_packet(struct drm_encoder *encoder,
+ enum hdmi_infoframe_type type)
+{
+ struct drm_device *dev = encoder->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ u32 packet_id = type - 0x80;
+
+ HDMI_WRITE(VC4_HDMI_RAM_PACKET_CONFIG,
+ HDMI_READ(VC4_HDMI_RAM_PACKET_CONFIG) & ~BIT(packet_id));
+
+ return wait_for(!(HDMI_READ(VC4_HDMI_RAM_PACKET_STATUS) &
+ BIT(packet_id)), 100);
+}
+
+static void vc4_hdmi_write_infoframe(struct drm_encoder *encoder,
+ union hdmi_infoframe *frame)
+{
+ struct drm_device *dev = encoder->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ u32 packet_id = frame->any.type - 0x80;
+ u32 packet_reg = VC4_HDMI_RAM_PACKET(packet_id);
+ uint8_t buffer[VC4_HDMI_PACKET_STRIDE];
+ ssize_t len, i;
+ int ret;
+
+ WARN_ONCE(!(HDMI_READ(VC4_HDMI_RAM_PACKET_CONFIG) &
+ VC4_HDMI_RAM_PACKET_ENABLE),
+ "Packet RAM has to be on to store the packet.");
+
+ len = hdmi_infoframe_pack(frame, buffer, sizeof(buffer));
+ if (len < 0)
+ return;
+
+ ret = vc4_hdmi_stop_packet(encoder, frame->any.type);
+ if (ret) {
+ DRM_ERROR("Failed to wait for infoframe to go idle: %d\n", ret);
+ return;
+ }
+
+ for (i = 0; i < len; i += 7) {
+ HDMI_WRITE(packet_reg,
+ buffer[i + 0] << 0 |
+ buffer[i + 1] << 8 |
+ buffer[i + 2] << 16);
+ packet_reg += 4;
+
+ HDMI_WRITE(packet_reg,
+ buffer[i + 3] << 0 |
+ buffer[i + 4] << 8 |
+ buffer[i + 5] << 16 |
+ buffer[i + 6] << 24);
+ packet_reg += 4;
+ }
+
+ HDMI_WRITE(VC4_HDMI_RAM_PACKET_CONFIG,
+ HDMI_READ(VC4_HDMI_RAM_PACKET_CONFIG) | BIT(packet_id));
+ ret = wait_for((HDMI_READ(VC4_HDMI_RAM_PACKET_STATUS) &
+ BIT(packet_id)), 100);
+ if (ret)
+ DRM_ERROR("Failed to wait for infoframe to start: %d\n", ret);
+}
+
+static void vc4_hdmi_set_avi_infoframe(struct drm_encoder *encoder)
+{
+ struct vc4_hdmi_encoder *vc4_encoder = to_vc4_hdmi_encoder(encoder);
+ struct drm_crtc *crtc = encoder->crtc;
+ const struct drm_display_mode *mode = &crtc->state->adjusted_mode;
+ union hdmi_infoframe frame;
+ int ret;
+
+ ret = drm_hdmi_avi_infoframe_from_display_mode(&frame.avi, mode, false);
+ if (ret < 0) {
+ DRM_ERROR("couldn't fill AVI infoframe\n");
+ return;
+ }
+
+ drm_hdmi_avi_infoframe_quant_range(&frame.avi, mode,
+ vc4_encoder->limited_rgb_range ?
+ HDMI_QUANTIZATION_RANGE_LIMITED :
+ HDMI_QUANTIZATION_RANGE_FULL,
+ vc4_encoder->rgb_range_selectable,
+ false);
+
+ vc4_hdmi_write_infoframe(encoder, &frame);
+}
+
+static void vc4_hdmi_set_spd_infoframe(struct drm_encoder *encoder)
+{
+ union hdmi_infoframe frame;
+ int ret;
+
+ ret = hdmi_spd_infoframe_init(&frame.spd, "Broadcom", "Videocore");
+ if (ret < 0) {
+ DRM_ERROR("couldn't fill SPD infoframe\n");
+ return;
+ }
+
+ frame.spd.sdi = HDMI_SPD_SDI_PC;
+
+ vc4_hdmi_write_infoframe(encoder, &frame);
+}
+
+static void vc4_hdmi_set_audio_infoframe(struct drm_encoder *encoder)
+{
+ struct drm_device *drm = encoder->dev;
+ struct vc4_dev *vc4 = drm->dev_private;
+ struct vc4_hdmi *hdmi = vc4->hdmi;
+ union hdmi_infoframe frame;
+ int ret;
+
+ ret = hdmi_audio_infoframe_init(&frame.audio);
+
+ frame.audio.coding_type = HDMI_AUDIO_CODING_TYPE_STREAM;
+ frame.audio.sample_frequency = HDMI_AUDIO_SAMPLE_FREQUENCY_STREAM;
+ frame.audio.sample_size = HDMI_AUDIO_SAMPLE_SIZE_STREAM;
+ frame.audio.channels = hdmi->audio.channels;
+
+ vc4_hdmi_write_infoframe(encoder, &frame);
+}
+
+static void vc4_hdmi_set_infoframes(struct drm_encoder *encoder)
+{
+ vc4_hdmi_set_avi_infoframe(encoder);
+ vc4_hdmi_set_spd_infoframe(encoder);
+}
+
+static void vc4_hdmi_encoder_disable(struct drm_encoder *encoder)
+{
+ struct drm_device *dev = encoder->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_hdmi *hdmi = vc4->hdmi;
+ int ret;
+
+ HDMI_WRITE(VC4_HDMI_RAM_PACKET_CONFIG, 0);
+
+ HDMI_WRITE(VC4_HDMI_TX_PHY_RESET_CTL, 0xf << 16);
+ HD_WRITE(VC4_HD_VID_CTL,
+ HD_READ(VC4_HD_VID_CTL) & ~VC4_HD_VID_CTL_ENABLE);
+
+ clk_disable_unprepare(hdmi->pixel_clock);
+
+ ret = pm_runtime_put(&hdmi->pdev->dev);
+ if (ret < 0)
+ DRM_ERROR("Failed to release power domain: %d\n", ret);
+}
+
+static void vc4_hdmi_encoder_enable(struct drm_encoder *encoder)
+{
+ struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode;
+ struct vc4_hdmi_encoder *vc4_encoder = to_vc4_hdmi_encoder(encoder);
+ struct drm_device *dev = encoder->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_hdmi *hdmi = vc4->hdmi;
+ bool debug_dump_regs = false;
+ bool hsync_pos = mode->flags & DRM_MODE_FLAG_PHSYNC;
+ bool vsync_pos = mode->flags & DRM_MODE_FLAG_PVSYNC;
+ bool interlaced = mode->flags & DRM_MODE_FLAG_INTERLACE;
+ u32 pixel_rep = (mode->flags & DRM_MODE_FLAG_DBLCLK) ? 2 : 1;
+ u32 verta = (VC4_SET_FIELD(mode->crtc_vsync_end - mode->crtc_vsync_start,
+ VC4_HDMI_VERTA_VSP) |
+ VC4_SET_FIELD(mode->crtc_vsync_start - mode->crtc_vdisplay,
+ VC4_HDMI_VERTA_VFP) |
+ VC4_SET_FIELD(mode->crtc_vdisplay, VC4_HDMI_VERTA_VAL));
+ u32 vertb = (VC4_SET_FIELD(0, VC4_HDMI_VERTB_VSPO) |
+ VC4_SET_FIELD(mode->crtc_vtotal - mode->crtc_vsync_end,
+ VC4_HDMI_VERTB_VBP));
+ u32 vertb_even = (VC4_SET_FIELD(0, VC4_HDMI_VERTB_VSPO) |
+ VC4_SET_FIELD(mode->crtc_vtotal -
+ mode->crtc_vsync_end -
+ interlaced,
+ VC4_HDMI_VERTB_VBP));
+ u32 csc_ctl;
+ int ret;
+
+ ret = pm_runtime_get_sync(&hdmi->pdev->dev);
+ if (ret < 0) {
+ DRM_ERROR("Failed to retain power domain: %d\n", ret);
+ return;
+ }
+
+ ret = clk_set_rate(hdmi->pixel_clock,
+ mode->clock * 1000 *
+ ((mode->flags & DRM_MODE_FLAG_DBLCLK) ? 2 : 1));
+ if (ret) {
+ DRM_ERROR("Failed to set pixel clock rate: %d\n", ret);
+ return;
+ }
+
+ ret = clk_prepare_enable(hdmi->pixel_clock);
+ if (ret) {
+ DRM_ERROR("Failed to turn on pixel clock: %d\n", ret);
+ return;
+ }
+
+ HDMI_WRITE(VC4_HDMI_SW_RESET_CONTROL,
+ VC4_HDMI_SW_RESET_HDMI |
+ VC4_HDMI_SW_RESET_FORMAT_DETECT);
+
+ HDMI_WRITE(VC4_HDMI_SW_RESET_CONTROL, 0);
+
+ /* PHY should be in reset, like
+ * vc4_hdmi_encoder_disable() does.
+ */
+ HDMI_WRITE(VC4_HDMI_TX_PHY_RESET_CTL, 0xf << 16);
+
+ HDMI_WRITE(VC4_HDMI_TX_PHY_RESET_CTL, 0);
+
+ if (debug_dump_regs) {
+ DRM_INFO("HDMI regs before:\n");
+ vc4_hdmi_dump_regs(dev);
+ }
+
+ HD_WRITE(VC4_HD_VID_CTL, 0);
+
+ HDMI_WRITE(VC4_HDMI_SCHEDULER_CONTROL,
+ HDMI_READ(VC4_HDMI_SCHEDULER_CONTROL) |
+ VC4_HDMI_SCHEDULER_CONTROL_MANUAL_FORMAT |
+ VC4_HDMI_SCHEDULER_CONTROL_IGNORE_VSYNC_PREDICTS);
+
+ HDMI_WRITE(VC4_HDMI_HORZA,
+ (vsync_pos ? VC4_HDMI_HORZA_VPOS : 0) |
+ (hsync_pos ? VC4_HDMI_HORZA_HPOS : 0) |
+ VC4_SET_FIELD(mode->hdisplay * pixel_rep,
+ VC4_HDMI_HORZA_HAP));
+
+ HDMI_WRITE(VC4_HDMI_HORZB,
+ VC4_SET_FIELD((mode->htotal -
+ mode->hsync_end) * pixel_rep,
+ VC4_HDMI_HORZB_HBP) |
+ VC4_SET_FIELD((mode->hsync_end -
+ mode->hsync_start) * pixel_rep,
+ VC4_HDMI_HORZB_HSP) |
+ VC4_SET_FIELD((mode->hsync_start -
+ mode->hdisplay) * pixel_rep,
+ VC4_HDMI_HORZB_HFP));
+
+ HDMI_WRITE(VC4_HDMI_VERTA0, verta);
+ HDMI_WRITE(VC4_HDMI_VERTA1, verta);
+
+ HDMI_WRITE(VC4_HDMI_VERTB0, vertb_even);
+ HDMI_WRITE(VC4_HDMI_VERTB1, vertb);
+
+ HD_WRITE(VC4_HD_VID_CTL,
+ (vsync_pos ? 0 : VC4_HD_VID_CTL_VSYNC_LOW) |
+ (hsync_pos ? 0 : VC4_HD_VID_CTL_HSYNC_LOW));
+
+ csc_ctl = VC4_SET_FIELD(VC4_HD_CSC_CTL_ORDER_BGR,
+ VC4_HD_CSC_CTL_ORDER);
+
+ if (vc4_encoder->hdmi_monitor &&
+ drm_default_rgb_quant_range(mode) ==
+ HDMI_QUANTIZATION_RANGE_LIMITED) {
+ /* CEA VICs other than #1 requre limited range RGB
+ * output unless overridden by an AVI infoframe.
+ * Apply a colorspace conversion to squash 0-255 down
+ * to 16-235. The matrix here is:
+ *
+ * [ 0 0 0.8594 16]
+ * [ 0 0.8594 0 16]
+ * [ 0.8594 0 0 16]
+ * [ 0 0 0 1]
+ */
+ csc_ctl |= VC4_HD_CSC_CTL_ENABLE;
+ csc_ctl |= VC4_HD_CSC_CTL_RGB2YCC;
+ csc_ctl |= VC4_SET_FIELD(VC4_HD_CSC_CTL_MODE_CUSTOM,
+ VC4_HD_CSC_CTL_MODE);
+
+ HD_WRITE(VC4_HD_CSC_12_11, (0x000 << 16) | 0x000);
+ HD_WRITE(VC4_HD_CSC_14_13, (0x100 << 16) | 0x6e0);
+ HD_WRITE(VC4_HD_CSC_22_21, (0x6e0 << 16) | 0x000);
+ HD_WRITE(VC4_HD_CSC_24_23, (0x100 << 16) | 0x000);
+ HD_WRITE(VC4_HD_CSC_32_31, (0x000 << 16) | 0x6e0);
+ HD_WRITE(VC4_HD_CSC_34_33, (0x100 << 16) | 0x000);
+ vc4_encoder->limited_rgb_range = true;
+ } else {
+ vc4_encoder->limited_rgb_range = false;
+ }
+
+ /* The RGB order applies even when CSC is disabled. */
+ HD_WRITE(VC4_HD_CSC_CTL, csc_ctl);
+
+ HDMI_WRITE(VC4_HDMI_FIFO_CTL, VC4_HDMI_FIFO_CTL_MASTER_SLAVE_N);
+
+ if (debug_dump_regs) {
+ DRM_INFO("HDMI regs after:\n");
+ vc4_hdmi_dump_regs(dev);
+ }
+
+ HD_WRITE(VC4_HD_VID_CTL,
+ HD_READ(VC4_HD_VID_CTL) |
+ VC4_HD_VID_CTL_ENABLE |
+ VC4_HD_VID_CTL_UNDERFLOW_ENABLE |
+ VC4_HD_VID_CTL_FRAME_COUNTER_RESET);
+
+ if (vc4_encoder->hdmi_monitor) {
+ HDMI_WRITE(VC4_HDMI_SCHEDULER_CONTROL,
+ HDMI_READ(VC4_HDMI_SCHEDULER_CONTROL) |
+ VC4_HDMI_SCHEDULER_CONTROL_MODE_HDMI);
+
+ ret = wait_for(HDMI_READ(VC4_HDMI_SCHEDULER_CONTROL) &
+ VC4_HDMI_SCHEDULER_CONTROL_HDMI_ACTIVE, 1000);
+ WARN_ONCE(ret, "Timeout waiting for "
+ "VC4_HDMI_SCHEDULER_CONTROL_HDMI_ACTIVE\n");
+ } else {
+ HDMI_WRITE(VC4_HDMI_RAM_PACKET_CONFIG,
+ HDMI_READ(VC4_HDMI_RAM_PACKET_CONFIG) &
+ ~(VC4_HDMI_RAM_PACKET_ENABLE));
+ HDMI_WRITE(VC4_HDMI_SCHEDULER_CONTROL,
+ HDMI_READ(VC4_HDMI_SCHEDULER_CONTROL) &
+ ~VC4_HDMI_SCHEDULER_CONTROL_MODE_HDMI);
+
+ ret = wait_for(!(HDMI_READ(VC4_HDMI_SCHEDULER_CONTROL) &
+ VC4_HDMI_SCHEDULER_CONTROL_HDMI_ACTIVE), 1000);
+ WARN_ONCE(ret, "Timeout waiting for "
+ "!VC4_HDMI_SCHEDULER_CONTROL_HDMI_ACTIVE\n");
+ }
+
+ if (vc4_encoder->hdmi_monitor) {
+ u32 drift;
+
+ WARN_ON(!(HDMI_READ(VC4_HDMI_SCHEDULER_CONTROL) &
+ VC4_HDMI_SCHEDULER_CONTROL_HDMI_ACTIVE));
+ HDMI_WRITE(VC4_HDMI_SCHEDULER_CONTROL,
+ HDMI_READ(VC4_HDMI_SCHEDULER_CONTROL) |
+ VC4_HDMI_SCHEDULER_CONTROL_VERT_ALWAYS_KEEPOUT);
+
+ HDMI_WRITE(VC4_HDMI_RAM_PACKET_CONFIG,
+ VC4_HDMI_RAM_PACKET_ENABLE);
+
+ vc4_hdmi_set_infoframes(encoder);
+
+ drift = HDMI_READ(VC4_HDMI_FIFO_CTL);
+ drift &= VC4_HDMI_FIFO_VALID_WRITE_MASK;
+
+ HDMI_WRITE(VC4_HDMI_FIFO_CTL,
+ drift & ~VC4_HDMI_FIFO_CTL_RECENTER);
+ HDMI_WRITE(VC4_HDMI_FIFO_CTL,
+ drift | VC4_HDMI_FIFO_CTL_RECENTER);
+ usleep_range(1000, 1100);
+ HDMI_WRITE(VC4_HDMI_FIFO_CTL,
+ drift & ~VC4_HDMI_FIFO_CTL_RECENTER);
+ HDMI_WRITE(VC4_HDMI_FIFO_CTL,
+ drift | VC4_HDMI_FIFO_CTL_RECENTER);
+
+ ret = wait_for(HDMI_READ(VC4_HDMI_FIFO_CTL) &
+ VC4_HDMI_FIFO_CTL_RECENTER_DONE, 1);
+ WARN_ONCE(ret, "Timeout waiting for "
+ "VC4_HDMI_FIFO_CTL_RECENTER_DONE");
+ }
+}
+
+static enum drm_mode_status
+vc4_hdmi_encoder_mode_valid(struct drm_encoder *crtc,
+ const struct drm_display_mode *mode)
+{
+ /*
+ * As stated in RPi's vc4 firmware "HDMI state machine (HSM) clock must
+ * be faster than pixel clock, infinitesimally faster, tested in
+ * simulation. Otherwise, exact value is unimportant for HDMI
+ * operation." This conflicts with bcm2835's vc4 documentation, which
+ * states HSM's clock has to be at least 108% of the pixel clock.
+ *
+ * Real life tests reveal that vc4's firmware statement holds up, and
+ * users are able to use pixel clocks closer to HSM's, namely for
+ * 1920x1200@60Hz. So it was decided to have leave a 1% margin between
+ * both clocks. Which, for RPi0-3 implies a maximum pixel clock of
+ * 162MHz.
+ *
+ * Additionally, the AXI clock needs to be at least 25% of
+ * pixel clock, but HSM ends up being the limiting factor.
+ */
+ if (mode->clock > HSM_CLOCK_FREQ / (1000 * 101 / 100))
+ return MODE_CLOCK_HIGH;
+
+ return MODE_OK;
+}
+
+static const struct drm_encoder_helper_funcs vc4_hdmi_encoder_helper_funcs = {
+ .mode_valid = vc4_hdmi_encoder_mode_valid,
+ .disable = vc4_hdmi_encoder_disable,
+ .enable = vc4_hdmi_encoder_enable,
+};
+
+/* HDMI audio codec callbacks */
+static void vc4_hdmi_audio_set_mai_clock(struct vc4_hdmi *hdmi)
+{
+ struct drm_device *drm = hdmi->encoder->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(drm);
+ u32 hsm_clock = clk_get_rate(hdmi->hsm_clock);
+ unsigned long n, m;
+
+ rational_best_approximation(hsm_clock, hdmi->audio.samplerate,
+ VC4_HD_MAI_SMP_N_MASK >>
+ VC4_HD_MAI_SMP_N_SHIFT,
+ (VC4_HD_MAI_SMP_M_MASK >>
+ VC4_HD_MAI_SMP_M_SHIFT) + 1,
+ &n, &m);
+
+ HD_WRITE(VC4_HD_MAI_SMP,
+ VC4_SET_FIELD(n, VC4_HD_MAI_SMP_N) |
+ VC4_SET_FIELD(m - 1, VC4_HD_MAI_SMP_M));
+}
+
+static void vc4_hdmi_set_n_cts(struct vc4_hdmi *hdmi)
+{
+ struct drm_encoder *encoder = hdmi->encoder;
+ struct drm_crtc *crtc = encoder->crtc;
+ struct drm_device *drm = encoder->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(drm);
+ const struct drm_display_mode *mode = &crtc->state->adjusted_mode;
+ u32 samplerate = hdmi->audio.samplerate;
+ u32 n, cts;
+ u64 tmp;
+
+ n = 128 * samplerate / 1000;
+ tmp = (u64)(mode->clock * 1000) * n;
+ do_div(tmp, 128 * samplerate);
+ cts = tmp;
+
+ HDMI_WRITE(VC4_HDMI_CRP_CFG,
+ VC4_HDMI_CRP_CFG_EXTERNAL_CTS_EN |
+ VC4_SET_FIELD(n, VC4_HDMI_CRP_CFG_N));
+
+ /*
+ * We could get slightly more accurate clocks in some cases by
+ * providing a CTS_1 value. The two CTS values are alternated
+ * between based on the period fields
+ */
+ HDMI_WRITE(VC4_HDMI_CTS_0, cts);
+ HDMI_WRITE(VC4_HDMI_CTS_1, cts);
+}
+
+static inline struct vc4_hdmi *dai_to_hdmi(struct snd_soc_dai *dai)
+{
+ struct snd_soc_card *card = snd_soc_dai_get_drvdata(dai);
+
+ return snd_soc_card_get_drvdata(card);
+}
+
+static int vc4_hdmi_audio_startup(struct snd_pcm_substream *substream,
+ struct snd_soc_dai *dai)
+{
+ struct vc4_hdmi *hdmi = dai_to_hdmi(dai);
+ struct drm_encoder *encoder = hdmi->encoder;
+ struct vc4_dev *vc4 = to_vc4_dev(encoder->dev);
+ int ret;
+
+ if (hdmi->audio.substream && hdmi->audio.substream != substream)
+ return -EINVAL;
+
+ hdmi->audio.substream = substream;
+
+ /*
+ * If the HDMI encoder hasn't probed, or the encoder is
+ * currently in DVI mode, treat the codec dai as missing.
+ */
+ if (!encoder->crtc || !(HDMI_READ(VC4_HDMI_RAM_PACKET_CONFIG) &
+ VC4_HDMI_RAM_PACKET_ENABLE))
+ return -ENODEV;
+
+ ret = snd_pcm_hw_constraint_eld(substream->runtime,
+ hdmi->connector->eld);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int vc4_hdmi_audio_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
+{
+ return 0;
+}
+
+static void vc4_hdmi_audio_reset(struct vc4_hdmi *hdmi)
+{
+ struct drm_encoder *encoder = hdmi->encoder;
+ struct drm_device *drm = encoder->dev;
+ struct device *dev = &hdmi->pdev->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(drm);
+ int ret;
+
+ ret = vc4_hdmi_stop_packet(encoder, HDMI_INFOFRAME_TYPE_AUDIO);
+ if (ret)
+ dev_err(dev, "Failed to stop audio infoframe: %d\n", ret);
+
+ HD_WRITE(VC4_HD_MAI_CTL, VC4_HD_MAI_CTL_RESET);
+ HD_WRITE(VC4_HD_MAI_CTL, VC4_HD_MAI_CTL_ERRORF);
+ HD_WRITE(VC4_HD_MAI_CTL, VC4_HD_MAI_CTL_FLUSH);
+}
+
+static void vc4_hdmi_audio_shutdown(struct snd_pcm_substream *substream,
+ struct snd_soc_dai *dai)
+{
+ struct vc4_hdmi *hdmi = dai_to_hdmi(dai);
+
+ if (substream != hdmi->audio.substream)
+ return;
+
+ vc4_hdmi_audio_reset(hdmi);
+
+ hdmi->audio.substream = NULL;
+}
+
+/* HDMI audio codec callbacks */
+static int vc4_hdmi_audio_hw_params(struct snd_pcm_substream *substream,
+ struct snd_pcm_hw_params *params,
+ struct snd_soc_dai *dai)
+{
+ struct vc4_hdmi *hdmi = dai_to_hdmi(dai);
+ struct drm_encoder *encoder = hdmi->encoder;
+ struct drm_device *drm = encoder->dev;
+ struct device *dev = &hdmi->pdev->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(drm);
+ u32 audio_packet_config, channel_mask;
+ u32 channel_map, i;
+
+ if (substream != hdmi->audio.substream)
+ return -EINVAL;
+
+ dev_dbg(dev, "%s: %u Hz, %d bit, %d channels\n", __func__,
+ params_rate(params), params_width(params),
+ params_channels(params));
+
+ hdmi->audio.channels = params_channels(params);
+ hdmi->audio.samplerate = params_rate(params);
+
+ HD_WRITE(VC4_HD_MAI_CTL,
+ VC4_HD_MAI_CTL_RESET |
+ VC4_HD_MAI_CTL_FLUSH |
+ VC4_HD_MAI_CTL_DLATE |
+ VC4_HD_MAI_CTL_ERRORE |
+ VC4_HD_MAI_CTL_ERRORF);
+
+ vc4_hdmi_audio_set_mai_clock(hdmi);
+
+ audio_packet_config =
+ VC4_HDMI_AUDIO_PACKET_ZERO_DATA_ON_SAMPLE_FLAT |
+ VC4_HDMI_AUDIO_PACKET_ZERO_DATA_ON_INACTIVE_CHANNELS |
+ VC4_SET_FIELD(0xf, VC4_HDMI_AUDIO_PACKET_B_FRAME_IDENTIFIER);
+
+ channel_mask = GENMASK(hdmi->audio.channels - 1, 0);
+ audio_packet_config |= VC4_SET_FIELD(channel_mask,
+ VC4_HDMI_AUDIO_PACKET_CEA_MASK);
+
+ /* Set the MAI threshold. This logic mimics the firmware's. */
+ if (hdmi->audio.samplerate > 96000) {
+ HD_WRITE(VC4_HD_MAI_THR,
+ VC4_SET_FIELD(0x12, VC4_HD_MAI_THR_DREQHIGH) |
+ VC4_SET_FIELD(0x12, VC4_HD_MAI_THR_DREQLOW));
+ } else if (hdmi->audio.samplerate > 48000) {
+ HD_WRITE(VC4_HD_MAI_THR,
+ VC4_SET_FIELD(0x14, VC4_HD_MAI_THR_DREQHIGH) |
+ VC4_SET_FIELD(0x12, VC4_HD_MAI_THR_DREQLOW));
+ } else {
+ HD_WRITE(VC4_HD_MAI_THR,
+ VC4_SET_FIELD(0x10, VC4_HD_MAI_THR_PANICHIGH) |
+ VC4_SET_FIELD(0x10, VC4_HD_MAI_THR_PANICLOW) |
+ VC4_SET_FIELD(0x10, VC4_HD_MAI_THR_DREQHIGH) |
+ VC4_SET_FIELD(0x10, VC4_HD_MAI_THR_DREQLOW));
+ }
+
+ HDMI_WRITE(VC4_HDMI_MAI_CONFIG,
+ VC4_HDMI_MAI_CONFIG_BIT_REVERSE |
+ VC4_SET_FIELD(channel_mask, VC4_HDMI_MAI_CHANNEL_MASK));
+
+ channel_map = 0;
+ for (i = 0; i < 8; i++) {
+ if (channel_mask & BIT(i))
+ channel_map |= i << (3 * i);
+ }
+
+ HDMI_WRITE(VC4_HDMI_MAI_CHANNEL_MAP, channel_map);
+ HDMI_WRITE(VC4_HDMI_AUDIO_PACKET_CONFIG, audio_packet_config);
+ vc4_hdmi_set_n_cts(hdmi);
+
+ return 0;
+}
+
+static int vc4_hdmi_audio_trigger(struct snd_pcm_substream *substream, int cmd,
+ struct snd_soc_dai *dai)
+{
+ struct vc4_hdmi *hdmi = dai_to_hdmi(dai);
+ struct drm_encoder *encoder = hdmi->encoder;
+ struct drm_device *drm = encoder->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(drm);
+
+ switch (cmd) {
+ case SNDRV_PCM_TRIGGER_START:
+ vc4_hdmi_set_audio_infoframe(encoder);
+ HDMI_WRITE(VC4_HDMI_TX_PHY_CTL0,
+ HDMI_READ(VC4_HDMI_TX_PHY_CTL0) &
+ ~VC4_HDMI_TX_PHY_RNG_PWRDN);
+ HD_WRITE(VC4_HD_MAI_CTL,
+ VC4_SET_FIELD(hdmi->audio.channels,
+ VC4_HD_MAI_CTL_CHNUM) |
+ VC4_HD_MAI_CTL_ENABLE);
+ break;
+ case SNDRV_PCM_TRIGGER_STOP:
+ HD_WRITE(VC4_HD_MAI_CTL,
+ VC4_HD_MAI_CTL_DLATE |
+ VC4_HD_MAI_CTL_ERRORE |
+ VC4_HD_MAI_CTL_ERRORF);
+ HDMI_WRITE(VC4_HDMI_TX_PHY_CTL0,
+ HDMI_READ(VC4_HDMI_TX_PHY_CTL0) |
+ VC4_HDMI_TX_PHY_RNG_PWRDN);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static inline struct vc4_hdmi *
+snd_component_to_hdmi(struct snd_soc_component *component)
+{
+ struct snd_soc_card *card = snd_soc_component_get_drvdata(component);
+
+ return snd_soc_card_get_drvdata(card);
+}
+
+static int vc4_hdmi_audio_eld_ctl_info(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_info *uinfo)
+{
+ struct snd_soc_component *component = snd_kcontrol_chip(kcontrol);
+ struct vc4_hdmi *hdmi = snd_component_to_hdmi(component);
+
+ uinfo->type = SNDRV_CTL_ELEM_TYPE_BYTES;
+ uinfo->count = sizeof(hdmi->connector->eld);
+
+ return 0;
+}
+
+static int vc4_hdmi_audio_eld_ctl_get(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_kcontrol_chip(kcontrol);
+ struct vc4_hdmi *hdmi = snd_component_to_hdmi(component);
+
+ memcpy(ucontrol->value.bytes.data, hdmi->connector->eld,
+ sizeof(hdmi->connector->eld));
+
+ return 0;
+}
+
+static const struct snd_kcontrol_new vc4_hdmi_audio_controls[] = {
+ {
+ .access = SNDRV_CTL_ELEM_ACCESS_READ |
+ SNDRV_CTL_ELEM_ACCESS_VOLATILE,
+ .iface = SNDRV_CTL_ELEM_IFACE_PCM,
+ .name = "ELD",
+ .info = vc4_hdmi_audio_eld_ctl_info,
+ .get = vc4_hdmi_audio_eld_ctl_get,
+ },
+};
+
+static const struct snd_soc_dapm_widget vc4_hdmi_audio_widgets[] = {
+ SND_SOC_DAPM_OUTPUT("TX"),
+};
+
+static const struct snd_soc_dapm_route vc4_hdmi_audio_routes[] = {
+ { "TX", NULL, "Playback" },
+};
+
+static const struct snd_soc_component_driver vc4_hdmi_audio_component_drv = {
+ .controls = vc4_hdmi_audio_controls,
+ .num_controls = ARRAY_SIZE(vc4_hdmi_audio_controls),
+ .dapm_widgets = vc4_hdmi_audio_widgets,
+ .num_dapm_widgets = ARRAY_SIZE(vc4_hdmi_audio_widgets),
+ .dapm_routes = vc4_hdmi_audio_routes,
+ .num_dapm_routes = ARRAY_SIZE(vc4_hdmi_audio_routes),
+ .idle_bias_on = 1,
+ .use_pmdown_time = 1,
+ .endianness = 1,
+ .non_legacy_dai_naming = 1,
+};
+
+static const struct snd_soc_dai_ops vc4_hdmi_audio_dai_ops = {
+ .startup = vc4_hdmi_audio_startup,
+ .shutdown = vc4_hdmi_audio_shutdown,
+ .hw_params = vc4_hdmi_audio_hw_params,
+ .set_fmt = vc4_hdmi_audio_set_fmt,
+ .trigger = vc4_hdmi_audio_trigger,
+};
+
+static struct snd_soc_dai_driver vc4_hdmi_audio_codec_dai_drv = {
+ .name = "vc4-hdmi-hifi",
+ .playback = {
+ .stream_name = "Playback",
+ .channels_min = 2,
+ .channels_max = 8,
+ .rates = SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_44100 |
+ SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_88200 |
+ SNDRV_PCM_RATE_96000 | SNDRV_PCM_RATE_176400 |
+ SNDRV_PCM_RATE_192000,
+ .formats = SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_LE,
+ },
+};
+
+static const struct snd_soc_component_driver vc4_hdmi_audio_cpu_dai_comp = {
+ .name = "vc4-hdmi-cpu-dai-component",
+};
+
+static int vc4_hdmi_audio_cpu_dai_probe(struct snd_soc_dai *dai)
+{
+ struct vc4_hdmi *hdmi = dai_to_hdmi(dai);
+
+ snd_soc_dai_init_dma_data(dai, &hdmi->audio.dma_data, NULL);
+
+ return 0;
+}
+
+static struct snd_soc_dai_driver vc4_hdmi_audio_cpu_dai_drv = {
+ .name = "vc4-hdmi-cpu-dai",
+ .probe = vc4_hdmi_audio_cpu_dai_probe,
+ .playback = {
+ .stream_name = "Playback",
+ .channels_min = 1,
+ .channels_max = 8,
+ .rates = SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_44100 |
+ SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_88200 |
+ SNDRV_PCM_RATE_96000 | SNDRV_PCM_RATE_176400 |
+ SNDRV_PCM_RATE_192000,
+ .formats = SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_LE,
+ },
+ .ops = &vc4_hdmi_audio_dai_ops,
+};
+
+static const struct snd_dmaengine_pcm_config pcm_conf = {
+ .chan_names[SNDRV_PCM_STREAM_PLAYBACK] = "audio-rx",
+ .prepare_slave_config = snd_dmaengine_pcm_prepare_slave_config,
+};
+
+static int vc4_hdmi_audio_init(struct vc4_hdmi *hdmi)
+{
+ struct snd_soc_dai_link *dai_link = &hdmi->audio.link;
+ struct snd_soc_card *card = &hdmi->audio.card;
+ struct device *dev = &hdmi->pdev->dev;
+ const __be32 *addr;
+ int ret;
+
+ if (!of_find_property(dev->of_node, "dmas", NULL)) {
+ dev_warn(dev,
+ "'dmas' DT property is missing, no HDMI audio\n");
+ return 0;
+ }
+
+ /*
+ * Get the physical address of VC4_HD_MAI_DATA. We need to retrieve
+ * the bus address specified in the DT, because the physical address
+ * (the one returned by platform_get_resource()) is not appropriate
+ * for DMA transfers.
+ * This VC/MMU should probably be exposed to avoid this kind of hacks.
+ */
+ addr = of_get_address(dev->of_node, 1, NULL, NULL);
+ hdmi->audio.dma_data.addr = be32_to_cpup(addr) + VC4_HD_MAI_DATA;
+ hdmi->audio.dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ hdmi->audio.dma_data.maxburst = 2;
+
+ ret = devm_snd_dmaengine_pcm_register(dev, &pcm_conf, 0);
+ if (ret) {
+ dev_err(dev, "Could not register PCM component: %d\n", ret);
+ return ret;
+ }
+
+ ret = devm_snd_soc_register_component(dev, &vc4_hdmi_audio_cpu_dai_comp,
+ &vc4_hdmi_audio_cpu_dai_drv, 1);
+ if (ret) {
+ dev_err(dev, "Could not register CPU DAI: %d\n", ret);
+ return ret;
+ }
+
+ /* register component and codec dai */
+ ret = devm_snd_soc_register_component(dev, &vc4_hdmi_audio_component_drv,
+ &vc4_hdmi_audio_codec_dai_drv, 1);
+ if (ret) {
+ dev_err(dev, "Could not register component: %d\n", ret);
+ return ret;
+ }
+
+ dai_link->name = "MAI";
+ dai_link->stream_name = "MAI PCM";
+ dai_link->codec_dai_name = vc4_hdmi_audio_codec_dai_drv.name;
+ dai_link->cpu_dai_name = dev_name(dev);
+ dai_link->codec_name = dev_name(dev);
+ dai_link->platform_name = dev_name(dev);
+
+ card->dai_link = dai_link;
+ card->num_links = 1;
+ card->name = "vc4-hdmi";
+ card->dev = dev;
+ card->owner = THIS_MODULE;
+
+ /*
+ * Be careful, snd_soc_register_card() calls dev_set_drvdata() and
+ * stores a pointer to the snd card object in dev->driver_data. This
+ * means we cannot use it for something else. The hdmi back-pointer is
+ * now stored in card->drvdata and should be retrieved with
+ * snd_soc_card_get_drvdata() if needed.
+ */
+ snd_soc_card_set_drvdata(card, hdmi);
+ ret = devm_snd_soc_register_card(dev, card);
+ if (ret)
+ dev_err(dev, "Could not register sound card: %d\n", ret);
+
+ return ret;
+
+}
+
+#ifdef CONFIG_DRM_VC4_HDMI_CEC
+static irqreturn_t vc4_cec_irq_handler_thread(int irq, void *priv)
+{
+ struct vc4_dev *vc4 = priv;
+ struct vc4_hdmi *hdmi = vc4->hdmi;
+
+ if (hdmi->cec_irq_was_rx) {
+ if (hdmi->cec_rx_msg.len)
+ cec_received_msg(hdmi->cec_adap, &hdmi->cec_rx_msg);
+ } else if (hdmi->cec_tx_ok) {
+ cec_transmit_done(hdmi->cec_adap, CEC_TX_STATUS_OK,
+ 0, 0, 0, 0);
+ } else {
+ /*
+ * This CEC implementation makes 1 retry, so if we
+ * get a NACK, then that means it made 2 attempts.
+ */
+ cec_transmit_done(hdmi->cec_adap, CEC_TX_STATUS_NACK,
+ 0, 2, 0, 0);
+ }
+ return IRQ_HANDLED;
+}
+
+static void vc4_cec_read_msg(struct vc4_dev *vc4, u32 cntrl1)
+{
+ struct cec_msg *msg = &vc4->hdmi->cec_rx_msg;
+ unsigned int i;
+
+ msg->len = 1 + ((cntrl1 & VC4_HDMI_CEC_REC_WRD_CNT_MASK) >>
+ VC4_HDMI_CEC_REC_WRD_CNT_SHIFT);
+ for (i = 0; i < msg->len; i += 4) {
+ u32 val = HDMI_READ(VC4_HDMI_CEC_RX_DATA_1 + i);
+
+ msg->msg[i] = val & 0xff;
+ msg->msg[i + 1] = (val >> 8) & 0xff;
+ msg->msg[i + 2] = (val >> 16) & 0xff;
+ msg->msg[i + 3] = (val >> 24) & 0xff;
+ }
+}
+
+static irqreturn_t vc4_cec_irq_handler(int irq, void *priv)
+{
+ struct vc4_dev *vc4 = priv;
+ struct vc4_hdmi *hdmi = vc4->hdmi;
+ u32 stat = HDMI_READ(VC4_HDMI_CPU_STATUS);
+ u32 cntrl1, cntrl5;
+
+ if (!(stat & VC4_HDMI_CPU_CEC))
+ return IRQ_NONE;
+ hdmi->cec_rx_msg.len = 0;
+ cntrl1 = HDMI_READ(VC4_HDMI_CEC_CNTRL_1);
+ cntrl5 = HDMI_READ(VC4_HDMI_CEC_CNTRL_5);
+ hdmi->cec_irq_was_rx = cntrl5 & VC4_HDMI_CEC_RX_CEC_INT;
+ if (hdmi->cec_irq_was_rx) {
+ vc4_cec_read_msg(vc4, cntrl1);
+ cntrl1 |= VC4_HDMI_CEC_CLEAR_RECEIVE_OFF;
+ HDMI_WRITE(VC4_HDMI_CEC_CNTRL_1, cntrl1);
+ cntrl1 &= ~VC4_HDMI_CEC_CLEAR_RECEIVE_OFF;
+ } else {
+ hdmi->cec_tx_ok = cntrl1 & VC4_HDMI_CEC_TX_STATUS_GOOD;
+ cntrl1 &= ~VC4_HDMI_CEC_START_XMIT_BEGIN;
+ }
+ HDMI_WRITE(VC4_HDMI_CEC_CNTRL_1, cntrl1);
+ HDMI_WRITE(VC4_HDMI_CPU_CLEAR, VC4_HDMI_CPU_CEC);
+
+ return IRQ_WAKE_THREAD;
+}
+
+static int vc4_hdmi_cec_adap_enable(struct cec_adapter *adap, bool enable)
+{
+ struct vc4_dev *vc4 = cec_get_drvdata(adap);
+ /* clock period in microseconds */
+ const u32 usecs = 1000000 / CEC_CLOCK_FREQ;
+ u32 val = HDMI_READ(VC4_HDMI_CEC_CNTRL_5);
+
+ val &= ~(VC4_HDMI_CEC_TX_SW_RESET | VC4_HDMI_CEC_RX_SW_RESET |
+ VC4_HDMI_CEC_CNT_TO_4700_US_MASK |
+ VC4_HDMI_CEC_CNT_TO_4500_US_MASK);
+ val |= ((4700 / usecs) << VC4_HDMI_CEC_CNT_TO_4700_US_SHIFT) |
+ ((4500 / usecs) << VC4_HDMI_CEC_CNT_TO_4500_US_SHIFT);
+
+ if (enable) {
+ HDMI_WRITE(VC4_HDMI_CEC_CNTRL_5, val |
+ VC4_HDMI_CEC_TX_SW_RESET | VC4_HDMI_CEC_RX_SW_RESET);
+ HDMI_WRITE(VC4_HDMI_CEC_CNTRL_5, val);
+ HDMI_WRITE(VC4_HDMI_CEC_CNTRL_2,
+ ((1500 / usecs) << VC4_HDMI_CEC_CNT_TO_1500_US_SHIFT) |
+ ((1300 / usecs) << VC4_HDMI_CEC_CNT_TO_1300_US_SHIFT) |
+ ((800 / usecs) << VC4_HDMI_CEC_CNT_TO_800_US_SHIFT) |
+ ((600 / usecs) << VC4_HDMI_CEC_CNT_TO_600_US_SHIFT) |
+ ((400 / usecs) << VC4_HDMI_CEC_CNT_TO_400_US_SHIFT));
+ HDMI_WRITE(VC4_HDMI_CEC_CNTRL_3,
+ ((2750 / usecs) << VC4_HDMI_CEC_CNT_TO_2750_US_SHIFT) |
+ ((2400 / usecs) << VC4_HDMI_CEC_CNT_TO_2400_US_SHIFT) |
+ ((2050 / usecs) << VC4_HDMI_CEC_CNT_TO_2050_US_SHIFT) |
+ ((1700 / usecs) << VC4_HDMI_CEC_CNT_TO_1700_US_SHIFT));
+ HDMI_WRITE(VC4_HDMI_CEC_CNTRL_4,
+ ((4300 / usecs) << VC4_HDMI_CEC_CNT_TO_4300_US_SHIFT) |
+ ((3900 / usecs) << VC4_HDMI_CEC_CNT_TO_3900_US_SHIFT) |
+ ((3600 / usecs) << VC4_HDMI_CEC_CNT_TO_3600_US_SHIFT) |
+ ((3500 / usecs) << VC4_HDMI_CEC_CNT_TO_3500_US_SHIFT));
+
+ HDMI_WRITE(VC4_HDMI_CPU_MASK_CLEAR, VC4_HDMI_CPU_CEC);
+ } else {
+ HDMI_WRITE(VC4_HDMI_CPU_MASK_SET, VC4_HDMI_CPU_CEC);
+ HDMI_WRITE(VC4_HDMI_CEC_CNTRL_5, val |
+ VC4_HDMI_CEC_TX_SW_RESET | VC4_HDMI_CEC_RX_SW_RESET);
+ }
+ return 0;
+}
+
+static int vc4_hdmi_cec_adap_log_addr(struct cec_adapter *adap, u8 log_addr)
+{
+ struct vc4_dev *vc4 = cec_get_drvdata(adap);
+
+ HDMI_WRITE(VC4_HDMI_CEC_CNTRL_1,
+ (HDMI_READ(VC4_HDMI_CEC_CNTRL_1) & ~VC4_HDMI_CEC_ADDR_MASK) |
+ (log_addr & 0xf) << VC4_HDMI_CEC_ADDR_SHIFT);
+ return 0;
+}
+
+static int vc4_hdmi_cec_adap_transmit(struct cec_adapter *adap, u8 attempts,
+ u32 signal_free_time, struct cec_msg *msg)
+{
+ struct vc4_dev *vc4 = cec_get_drvdata(adap);
+ u32 val;
+ unsigned int i;
+
+ for (i = 0; i < msg->len; i += 4)
+ HDMI_WRITE(VC4_HDMI_CEC_TX_DATA_1 + i,
+ (msg->msg[i]) |
+ (msg->msg[i + 1] << 8) |
+ (msg->msg[i + 2] << 16) |
+ (msg->msg[i + 3] << 24));
+
+ val = HDMI_READ(VC4_HDMI_CEC_CNTRL_1);
+ val &= ~VC4_HDMI_CEC_START_XMIT_BEGIN;
+ HDMI_WRITE(VC4_HDMI_CEC_CNTRL_1, val);
+ val &= ~VC4_HDMI_CEC_MESSAGE_LENGTH_MASK;
+ val |= (msg->len - 1) << VC4_HDMI_CEC_MESSAGE_LENGTH_SHIFT;
+ val |= VC4_HDMI_CEC_START_XMIT_BEGIN;
+
+ HDMI_WRITE(VC4_HDMI_CEC_CNTRL_1, val);
+ return 0;
+}
+
+static const struct cec_adap_ops vc4_hdmi_cec_adap_ops = {
+ .adap_enable = vc4_hdmi_cec_adap_enable,
+ .adap_log_addr = vc4_hdmi_cec_adap_log_addr,
+ .adap_transmit = vc4_hdmi_cec_adap_transmit,
+};
+#endif
+
+static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct drm_device *drm = dev_get_drvdata(master);
+ struct vc4_dev *vc4 = drm->dev_private;
+ struct vc4_hdmi *hdmi;
+ struct vc4_hdmi_encoder *vc4_hdmi_encoder;
+ struct device_node *ddc_node;
+ u32 value;
+ int ret;
+
+ hdmi = devm_kzalloc(dev, sizeof(*hdmi), GFP_KERNEL);
+ if (!hdmi)
+ return -ENOMEM;
+
+ vc4_hdmi_encoder = devm_kzalloc(dev, sizeof(*vc4_hdmi_encoder),
+ GFP_KERNEL);
+ if (!vc4_hdmi_encoder)
+ return -ENOMEM;
+ vc4_hdmi_encoder->base.type = VC4_ENCODER_TYPE_HDMI;
+ hdmi->encoder = &vc4_hdmi_encoder->base.base;
+
+ hdmi->pdev = pdev;
+ hdmi->hdmicore_regs = vc4_ioremap_regs(pdev, 0);
+ if (IS_ERR(hdmi->hdmicore_regs))
+ return PTR_ERR(hdmi->hdmicore_regs);
+
+ hdmi->hd_regs = vc4_ioremap_regs(pdev, 1);
+ if (IS_ERR(hdmi->hd_regs))
+ return PTR_ERR(hdmi->hd_regs);
+
+ hdmi->pixel_clock = devm_clk_get(dev, "pixel");
+ if (IS_ERR(hdmi->pixel_clock)) {
+ DRM_ERROR("Failed to get pixel clock\n");
+ return PTR_ERR(hdmi->pixel_clock);
+ }
+ hdmi->hsm_clock = devm_clk_get(dev, "hdmi");
+ if (IS_ERR(hdmi->hsm_clock)) {
+ DRM_ERROR("Failed to get HDMI state machine clock\n");
+ return PTR_ERR(hdmi->hsm_clock);
+ }
+
+ ddc_node = of_parse_phandle(dev->of_node, "ddc", 0);
+ if (!ddc_node) {
+ DRM_ERROR("Failed to find ddc node in device tree\n");
+ return -ENODEV;
+ }
+
+ hdmi->ddc = of_find_i2c_adapter_by_node(ddc_node);
+ of_node_put(ddc_node);
+ if (!hdmi->ddc) {
+ DRM_DEBUG("Failed to get ddc i2c adapter by node\n");
+ return -EPROBE_DEFER;
+ }
+
+ /* This is the rate that is set by the firmware. The number
+ * needs to be a bit higher than the pixel clock rate
+ * (generally 148.5Mhz).
+ */
+ ret = clk_set_rate(hdmi->hsm_clock, HSM_CLOCK_FREQ);
+ if (ret) {
+ DRM_ERROR("Failed to set HSM clock rate: %d\n", ret);
+ goto err_put_i2c;
+ }
+
+ ret = clk_prepare_enable(hdmi->hsm_clock);
+ if (ret) {
+ DRM_ERROR("Failed to turn on HDMI state machine clock: %d\n",
+ ret);
+ goto err_put_i2c;
+ }
+
+ /* Only use the GPIO HPD pin if present in the DT, otherwise
+ * we'll use the HDMI core's register.
+ */
+ if (of_find_property(dev->of_node, "hpd-gpios", &value)) {
+ enum of_gpio_flags hpd_gpio_flags;
+
+ hdmi->hpd_gpio = of_get_named_gpio_flags(dev->of_node,
+ "hpd-gpios", 0,
+ &hpd_gpio_flags);
+ if (hdmi->hpd_gpio < 0) {
+ ret = hdmi->hpd_gpio;
+ goto err_unprepare_hsm;
+ }
+
+ hdmi->hpd_active_low = hpd_gpio_flags & OF_GPIO_ACTIVE_LOW;
+ }
+
+ vc4->hdmi = hdmi;
+
+ /* HDMI core must be enabled. */
+ if (!(HD_READ(VC4_HD_M_CTL) & VC4_HD_M_ENABLE)) {
+ HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_SW_RST);
+ udelay(1);
+ HD_WRITE(VC4_HD_M_CTL, 0);
+
+ HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_ENABLE);
+ }
+ pm_runtime_enable(dev);
+
+ drm_encoder_init(drm, hdmi->encoder, &vc4_hdmi_encoder_funcs,
+ DRM_MODE_ENCODER_TMDS, NULL);
+ drm_encoder_helper_add(hdmi->encoder, &vc4_hdmi_encoder_helper_funcs);
+
+ hdmi->connector = vc4_hdmi_connector_init(drm, hdmi->encoder);
+ if (IS_ERR(hdmi->connector)) {
+ ret = PTR_ERR(hdmi->connector);
+ goto err_destroy_encoder;
+ }
+#ifdef CONFIG_DRM_VC4_HDMI_CEC
+ hdmi->cec_adap = cec_allocate_adapter(&vc4_hdmi_cec_adap_ops,
+ vc4, "vc4",
+ CEC_CAP_TRANSMIT |
+ CEC_CAP_LOG_ADDRS |
+ CEC_CAP_PASSTHROUGH |
+ CEC_CAP_RC, 1);
+ ret = PTR_ERR_OR_ZERO(hdmi->cec_adap);
+ if (ret < 0)
+ goto err_destroy_conn;
+ HDMI_WRITE(VC4_HDMI_CPU_MASK_SET, 0xffffffff);
+ value = HDMI_READ(VC4_HDMI_CEC_CNTRL_1);
+ value &= ~VC4_HDMI_CEC_DIV_CLK_CNT_MASK;
+ /*
+ * Set the logical address to Unregistered and set the clock
+ * divider: the hsm_clock rate and this divider setting will
+ * give a 40 kHz CEC clock.
+ */
+ value |= VC4_HDMI_CEC_ADDR_MASK |
+ (4091 << VC4_HDMI_CEC_DIV_CLK_CNT_SHIFT);
+ HDMI_WRITE(VC4_HDMI_CEC_CNTRL_1, value);
+ ret = devm_request_threaded_irq(dev, platform_get_irq(pdev, 0),
+ vc4_cec_irq_handler,
+ vc4_cec_irq_handler_thread, 0,
+ "vc4 hdmi cec", vc4);
+ if (ret)
+ goto err_delete_cec_adap;
+ ret = cec_register_adapter(hdmi->cec_adap, dev);
+ if (ret < 0)
+ goto err_delete_cec_adap;
+#endif
+
+ ret = vc4_hdmi_audio_init(hdmi);
+ if (ret)
+ goto err_destroy_encoder;
+
+ return 0;
+
+#ifdef CONFIG_DRM_VC4_HDMI_CEC
+err_delete_cec_adap:
+ cec_delete_adapter(hdmi->cec_adap);
+err_destroy_conn:
+ vc4_hdmi_connector_destroy(hdmi->connector);
+#endif
+err_destroy_encoder:
+ vc4_hdmi_encoder_destroy(hdmi->encoder);
+err_unprepare_hsm:
+ clk_disable_unprepare(hdmi->hsm_clock);
+ pm_runtime_disable(dev);
+err_put_i2c:
+ put_device(&hdmi->ddc->dev);
+
+ return ret;
+}
+
+static void vc4_hdmi_unbind(struct device *dev, struct device *master,
+ void *data)
+{
+ struct drm_device *drm = dev_get_drvdata(master);
+ struct vc4_dev *vc4 = drm->dev_private;
+ struct vc4_hdmi *hdmi = vc4->hdmi;
+
+ cec_unregister_adapter(hdmi->cec_adap);
+ vc4_hdmi_connector_destroy(hdmi->connector);
+ vc4_hdmi_encoder_destroy(hdmi->encoder);
+
+ clk_disable_unprepare(hdmi->hsm_clock);
+ pm_runtime_disable(dev);
+
+ put_device(&hdmi->ddc->dev);
+
+ vc4->hdmi = NULL;
+}
+
+static const struct component_ops vc4_hdmi_ops = {
+ .bind = vc4_hdmi_bind,
+ .unbind = vc4_hdmi_unbind,
+};
+
+static int vc4_hdmi_dev_probe(struct platform_device *pdev)
+{
+ return component_add(&pdev->dev, &vc4_hdmi_ops);
+}
+
+static int vc4_hdmi_dev_remove(struct platform_device *pdev)
+{
+ component_del(&pdev->dev, &vc4_hdmi_ops);
+ return 0;
+}
+
+static const struct of_device_id vc4_hdmi_dt_match[] = {
+ { .compatible = "brcm,bcm2835-hdmi" },
+ {}
+};
+
+struct platform_driver vc4_hdmi_driver = {
+ .probe = vc4_hdmi_dev_probe,
+ .remove = vc4_hdmi_dev_remove,
+ .driver = {
+ .name = "vc4_hdmi",
+ .of_match_table = vc4_hdmi_dt_match,
+ },
+};
diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c
new file mode 100644
index 000000000..5d8c749c9
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_hvs.c
@@ -0,0 +1,277 @@
+/*
+ * Copyright (C) 2015 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/**
+ * DOC: VC4 HVS module.
+ *
+ * The Hardware Video Scaler (HVS) is the piece of hardware that does
+ * translation, scaling, colorspace conversion, and compositing of
+ * pixels stored in framebuffers into a FIFO of pixels going out to
+ * the Pixel Valve (CRTC). It operates at the system clock rate (the
+ * system audio clock gate, specifically), which is much higher than
+ * the pixel clock rate.
+ *
+ * There is a single global HVS, with multiple output FIFOs that can
+ * be consumed by the PVs. This file just manages the resources for
+ * the HVS, while the vc4_crtc.c code actually drives HVS setup for
+ * each CRTC.
+ */
+
+#include <linux/component.h>
+#include "vc4_drv.h"
+#include "vc4_regs.h"
+
+#define HVS_REG(reg) { reg, #reg }
+static const struct {
+ u32 reg;
+ const char *name;
+} hvs_regs[] = {
+ HVS_REG(SCALER_DISPCTRL),
+ HVS_REG(SCALER_DISPSTAT),
+ HVS_REG(SCALER_DISPID),
+ HVS_REG(SCALER_DISPECTRL),
+ HVS_REG(SCALER_DISPPROF),
+ HVS_REG(SCALER_DISPDITHER),
+ HVS_REG(SCALER_DISPEOLN),
+ HVS_REG(SCALER_DISPLIST0),
+ HVS_REG(SCALER_DISPLIST1),
+ HVS_REG(SCALER_DISPLIST2),
+ HVS_REG(SCALER_DISPLSTAT),
+ HVS_REG(SCALER_DISPLACT0),
+ HVS_REG(SCALER_DISPLACT1),
+ HVS_REG(SCALER_DISPLACT2),
+ HVS_REG(SCALER_DISPCTRL0),
+ HVS_REG(SCALER_DISPBKGND0),
+ HVS_REG(SCALER_DISPSTAT0),
+ HVS_REG(SCALER_DISPBASE0),
+ HVS_REG(SCALER_DISPCTRL1),
+ HVS_REG(SCALER_DISPBKGND1),
+ HVS_REG(SCALER_DISPSTAT1),
+ HVS_REG(SCALER_DISPBASE1),
+ HVS_REG(SCALER_DISPCTRL2),
+ HVS_REG(SCALER_DISPBKGND2),
+ HVS_REG(SCALER_DISPSTAT2),
+ HVS_REG(SCALER_DISPBASE2),
+ HVS_REG(SCALER_DISPALPHA2),
+ HVS_REG(SCALER_OLEDOFFS),
+ HVS_REG(SCALER_OLEDCOEF0),
+ HVS_REG(SCALER_OLEDCOEF1),
+ HVS_REG(SCALER_OLEDCOEF2),
+};
+
+void vc4_hvs_dump_state(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(hvs_regs); i++) {
+ DRM_INFO("0x%04x (%s): 0x%08x\n",
+ hvs_regs[i].reg, hvs_regs[i].name,
+ HVS_READ(hvs_regs[i].reg));
+ }
+
+ DRM_INFO("HVS ctx:\n");
+ for (i = 0; i < 64; i += 4) {
+ DRM_INFO("0x%08x (%s): 0x%08x 0x%08x 0x%08x 0x%08x\n",
+ i * 4, i < HVS_BOOTLOADER_DLIST_END ? "B" : "D",
+ readl((u32 __iomem *)vc4->hvs->dlist + i + 0),
+ readl((u32 __iomem *)vc4->hvs->dlist + i + 1),
+ readl((u32 __iomem *)vc4->hvs->dlist + i + 2),
+ readl((u32 __iomem *)vc4->hvs->dlist + i + 3));
+ }
+}
+
+#ifdef CONFIG_DEBUG_FS
+int vc4_hvs_debugfs_regs(struct seq_file *m, void *unused)
+{
+ struct drm_info_node *node = (struct drm_info_node *)m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(hvs_regs); i++) {
+ seq_printf(m, "%s (0x%04x): 0x%08x\n",
+ hvs_regs[i].name, hvs_regs[i].reg,
+ HVS_READ(hvs_regs[i].reg));
+ }
+
+ return 0;
+}
+#endif
+
+/* The filter kernel is composed of dwords each containing 3 9-bit
+ * signed integers packed next to each other.
+ */
+#define VC4_INT_TO_COEFF(coeff) (coeff & 0x1ff)
+#define VC4_PPF_FILTER_WORD(c0, c1, c2) \
+ ((((c0) & 0x1ff) << 0) | \
+ (((c1) & 0x1ff) << 9) | \
+ (((c2) & 0x1ff) << 18))
+
+/* The whole filter kernel is arranged as the coefficients 0-16 going
+ * up, then a pad, then 17-31 going down and reversed within the
+ * dwords. This means that a linear phase kernel (where it's
+ * symmetrical at the boundary between 15 and 16) has the last 5
+ * dwords matching the first 5, but reversed.
+ */
+#define VC4_LINEAR_PHASE_KERNEL(c0, c1, c2, c3, c4, c5, c6, c7, c8, \
+ c9, c10, c11, c12, c13, c14, c15) \
+ {VC4_PPF_FILTER_WORD(c0, c1, c2), \
+ VC4_PPF_FILTER_WORD(c3, c4, c5), \
+ VC4_PPF_FILTER_WORD(c6, c7, c8), \
+ VC4_PPF_FILTER_WORD(c9, c10, c11), \
+ VC4_PPF_FILTER_WORD(c12, c13, c14), \
+ VC4_PPF_FILTER_WORD(c15, c15, 0)}
+
+#define VC4_LINEAR_PHASE_KERNEL_DWORDS 6
+#define VC4_KERNEL_DWORDS (VC4_LINEAR_PHASE_KERNEL_DWORDS * 2 - 1)
+
+/* Recommended B=1/3, C=1/3 filter choice from Mitchell/Netravali.
+ * http://www.cs.utexas.edu/~fussell/courses/cs384g/lectures/mitchell/Mitchell.pdf
+ */
+static const u32 mitchell_netravali_1_3_1_3_kernel[] =
+ VC4_LINEAR_PHASE_KERNEL(0, -2, -6, -8, -10, -8, -3, 2, 18,
+ 50, 82, 119, 155, 187, 213, 227);
+
+static int vc4_hvs_upload_linear_kernel(struct vc4_hvs *hvs,
+ struct drm_mm_node *space,
+ const u32 *kernel)
+{
+ int ret, i;
+ u32 __iomem *dst_kernel;
+
+ ret = drm_mm_insert_node(&hvs->dlist_mm, space, VC4_KERNEL_DWORDS);
+ if (ret) {
+ DRM_ERROR("Failed to allocate space for filter kernel: %d\n",
+ ret);
+ return ret;
+ }
+
+ dst_kernel = hvs->dlist + space->start;
+
+ for (i = 0; i < VC4_KERNEL_DWORDS; i++) {
+ if (i < VC4_LINEAR_PHASE_KERNEL_DWORDS)
+ writel(kernel[i], &dst_kernel[i]);
+ else {
+ writel(kernel[VC4_KERNEL_DWORDS - i - 1],
+ &dst_kernel[i]);
+ }
+ }
+
+ return 0;
+}
+
+static int vc4_hvs_bind(struct device *dev, struct device *master, void *data)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct drm_device *drm = dev_get_drvdata(master);
+ struct vc4_dev *vc4 = drm->dev_private;
+ struct vc4_hvs *hvs = NULL;
+ int ret;
+ u32 dispctrl;
+
+ hvs = devm_kzalloc(&pdev->dev, sizeof(*hvs), GFP_KERNEL);
+ if (!hvs)
+ return -ENOMEM;
+
+ hvs->pdev = pdev;
+
+ hvs->regs = vc4_ioremap_regs(pdev, 0);
+ if (IS_ERR(hvs->regs))
+ return PTR_ERR(hvs->regs);
+
+ hvs->dlist = hvs->regs + SCALER_DLIST_START;
+
+ spin_lock_init(&hvs->mm_lock);
+
+ /* Set up the HVS display list memory manager. We never
+ * overwrite the setup from the bootloader (just 128b out of
+ * our 16K), since we don't want to scramble the screen when
+ * transitioning from the firmware's boot setup to runtime.
+ */
+ drm_mm_init(&hvs->dlist_mm,
+ HVS_BOOTLOADER_DLIST_END,
+ (SCALER_DLIST_SIZE >> 2) - HVS_BOOTLOADER_DLIST_END);
+
+ /* Set up the HVS LBM memory manager. We could have some more
+ * complicated data structure that allowed reuse of LBM areas
+ * between planes when they don't overlap on the screen, but
+ * for now we just allocate globally.
+ */
+ drm_mm_init(&hvs->lbm_mm, 0, 96 * 1024);
+
+ /* Upload filter kernels. We only have the one for now, so we
+ * keep it around for the lifetime of the driver.
+ */
+ ret = vc4_hvs_upload_linear_kernel(hvs,
+ &hvs->mitchell_netravali_filter,
+ mitchell_netravali_1_3_1_3_kernel);
+ if (ret)
+ return ret;
+
+ vc4->hvs = hvs;
+
+ dispctrl = HVS_READ(SCALER_DISPCTRL);
+
+ dispctrl |= SCALER_DISPCTRL_ENABLE;
+
+ /* Set DSP3 (PV1) to use HVS channel 2, which would otherwise
+ * be unused.
+ */
+ dispctrl &= ~SCALER_DISPCTRL_DSP3_MUX_MASK;
+ dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_DSP3_MUX);
+
+ HVS_WRITE(SCALER_DISPCTRL, dispctrl);
+
+ return 0;
+}
+
+static void vc4_hvs_unbind(struct device *dev, struct device *master,
+ void *data)
+{
+ struct drm_device *drm = dev_get_drvdata(master);
+ struct vc4_dev *vc4 = drm->dev_private;
+
+ if (vc4->hvs->mitchell_netravali_filter.allocated)
+ drm_mm_remove_node(&vc4->hvs->mitchell_netravali_filter);
+
+ drm_mm_takedown(&vc4->hvs->dlist_mm);
+ drm_mm_takedown(&vc4->hvs->lbm_mm);
+
+ vc4->hvs = NULL;
+}
+
+static const struct component_ops vc4_hvs_ops = {
+ .bind = vc4_hvs_bind,
+ .unbind = vc4_hvs_unbind,
+};
+
+static int vc4_hvs_dev_probe(struct platform_device *pdev)
+{
+ return component_add(&pdev->dev, &vc4_hvs_ops);
+}
+
+static int vc4_hvs_dev_remove(struct platform_device *pdev)
+{
+ component_del(&pdev->dev, &vc4_hvs_ops);
+ return 0;
+}
+
+static const struct of_device_id vc4_hvs_dt_match[] = {
+ { .compatible = "brcm,bcm2835-hvs" },
+ {}
+};
+
+struct platform_driver vc4_hvs_driver = {
+ .probe = vc4_hvs_dev_probe,
+ .remove = vc4_hvs_dev_remove,
+ .driver = {
+ .name = "vc4_hvs",
+ .of_match_table = vc4_hvs_dt_match,
+ },
+};
diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c
new file mode 100644
index 000000000..4cd2ccfe1
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_irq.c
@@ -0,0 +1,290 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * DOC: Interrupt management for the V3D engine
+ *
+ * We have an interrupt status register (V3D_INTCTL) which reports
+ * interrupts, and where writing 1 bits clears those interrupts.
+ * There are also a pair of interrupt registers
+ * (V3D_INTENA/V3D_INTDIS) where writing a 1 to their bits enables or
+ * disables that specific interrupt, and 0s written are ignored
+ * (reading either one returns the set of enabled interrupts).
+ *
+ * When we take a binning flush done interrupt, we need to submit the
+ * next frame for binning and move the finished frame to the render
+ * thread.
+ *
+ * When we take a render frame interrupt, we need to wake the
+ * processes waiting for some frame to be done, and get the next frame
+ * submitted ASAP (so the hardware doesn't sit idle when there's work
+ * to do).
+ *
+ * When we take the binner out of memory interrupt, we need to
+ * allocate some new memory and pass it to the binner so that the
+ * current job can make progress.
+ */
+
+#include "vc4_drv.h"
+#include "vc4_regs.h"
+
+#define V3D_DRIVER_IRQS (V3D_INT_OUTOMEM | \
+ V3D_INT_FLDONE | \
+ V3D_INT_FRDONE)
+
+DECLARE_WAIT_QUEUE_HEAD(render_wait);
+
+static void
+vc4_overflow_mem_work(struct work_struct *work)
+{
+ struct vc4_dev *vc4 =
+ container_of(work, struct vc4_dev, overflow_mem_work);
+ struct vc4_bo *bo = vc4->bin_bo;
+ int bin_bo_slot;
+ struct vc4_exec_info *exec;
+ unsigned long irqflags;
+
+ bin_bo_slot = vc4_v3d_get_bin_slot(vc4);
+ if (bin_bo_slot < 0) {
+ DRM_ERROR("Couldn't allocate binner overflow mem\n");
+ return;
+ }
+
+ spin_lock_irqsave(&vc4->job_lock, irqflags);
+
+ if (vc4->bin_alloc_overflow) {
+ /* If we had overflow memory allocated previously,
+ * then that chunk will free when the current bin job
+ * is done. If we don't have a bin job running, then
+ * the chunk will be done whenever the list of render
+ * jobs has drained.
+ */
+ exec = vc4_first_bin_job(vc4);
+ if (!exec)
+ exec = vc4_last_render_job(vc4);
+ if (exec) {
+ exec->bin_slots |= vc4->bin_alloc_overflow;
+ } else {
+ /* There's nothing queued in the hardware, so
+ * the old slot is free immediately.
+ */
+ vc4->bin_alloc_used &= ~vc4->bin_alloc_overflow;
+ }
+ }
+ vc4->bin_alloc_overflow = BIT(bin_bo_slot);
+
+ V3D_WRITE(V3D_BPOA, bo->base.paddr + bin_bo_slot * vc4->bin_alloc_size);
+ V3D_WRITE(V3D_BPOS, bo->base.base.size);
+ V3D_WRITE(V3D_INTCTL, V3D_INT_OUTOMEM);
+ V3D_WRITE(V3D_INTENA, V3D_INT_OUTOMEM);
+ spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+}
+
+static void
+vc4_irq_finish_bin_job(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_exec_info *next, *exec = vc4_first_bin_job(vc4);
+
+ if (!exec)
+ return;
+
+ vc4_move_job_to_render(dev, exec);
+ next = vc4_first_bin_job(vc4);
+
+ /* Only submit the next job in the bin list if it matches the perfmon
+ * attached to the one that just finished (or if both jobs don't have
+ * perfmon attached to them).
+ */
+ if (next && next->perfmon == exec->perfmon)
+ vc4_submit_next_bin_job(dev);
+}
+
+static void
+vc4_cancel_bin_job(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_exec_info *exec = vc4_first_bin_job(vc4);
+
+ if (!exec)
+ return;
+
+ /* Stop the perfmon so that the next bin job can be started. */
+ if (exec->perfmon)
+ vc4_perfmon_stop(vc4, exec->perfmon, false);
+
+ list_move_tail(&exec->head, &vc4->bin_job_list);
+ vc4_submit_next_bin_job(dev);
+}
+
+static void
+vc4_irq_finish_render_job(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_exec_info *exec = vc4_first_render_job(vc4);
+ struct vc4_exec_info *nextbin, *nextrender;
+
+ if (!exec)
+ return;
+
+ vc4->finished_seqno++;
+ list_move_tail(&exec->head, &vc4->job_done_list);
+
+ nextbin = vc4_first_bin_job(vc4);
+ nextrender = vc4_first_render_job(vc4);
+
+ /* Only stop the perfmon if following jobs in the queue don't expect it
+ * to be enabled.
+ */
+ if (exec->perfmon && !nextrender &&
+ (!nextbin || nextbin->perfmon != exec->perfmon))
+ vc4_perfmon_stop(vc4, exec->perfmon, true);
+
+ /* If there's a render job waiting, start it. If this is not the case
+ * we may have to unblock the binner if it's been stalled because of
+ * perfmon (this can be checked by comparing the perfmon attached to
+ * the finished renderjob to the one attached to the next bin job: if
+ * they don't match, this means the binner is stalled and should be
+ * restarted).
+ */
+ if (nextrender)
+ vc4_submit_next_render_job(dev);
+ else if (nextbin && nextbin->perfmon != exec->perfmon)
+ vc4_submit_next_bin_job(dev);
+
+ if (exec->fence) {
+ dma_fence_signal_locked(exec->fence);
+ dma_fence_put(exec->fence);
+ exec->fence = NULL;
+ }
+
+ wake_up_all(&vc4->job_wait_queue);
+ schedule_work(&vc4->job_done_work);
+}
+
+irqreturn_t
+vc4_irq(int irq, void *arg)
+{
+ struct drm_device *dev = arg;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ uint32_t intctl;
+ irqreturn_t status = IRQ_NONE;
+
+ barrier();
+ intctl = V3D_READ(V3D_INTCTL);
+
+ /* Acknowledge the interrupts we're handling here. The binner
+ * last flush / render frame done interrupt will be cleared,
+ * while OUTOMEM will stay high until the underlying cause is
+ * cleared.
+ */
+ V3D_WRITE(V3D_INTCTL, intctl);
+
+ if (intctl & V3D_INT_OUTOMEM) {
+ /* Disable OUTOMEM until the work is done. */
+ V3D_WRITE(V3D_INTDIS, V3D_INT_OUTOMEM);
+ schedule_work(&vc4->overflow_mem_work);
+ status = IRQ_HANDLED;
+ }
+
+ if (intctl & V3D_INT_FLDONE) {
+ spin_lock(&vc4->job_lock);
+ vc4_irq_finish_bin_job(dev);
+ spin_unlock(&vc4->job_lock);
+ status = IRQ_HANDLED;
+ }
+
+ if (intctl & V3D_INT_FRDONE) {
+ spin_lock(&vc4->job_lock);
+ vc4_irq_finish_render_job(dev);
+ spin_unlock(&vc4->job_lock);
+ status = IRQ_HANDLED;
+ }
+
+ return status;
+}
+
+void
+vc4_irq_preinstall(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+ init_waitqueue_head(&vc4->job_wait_queue);
+ INIT_WORK(&vc4->overflow_mem_work, vc4_overflow_mem_work);
+
+ /* Clear any pending interrupts someone might have left around
+ * for us.
+ */
+ V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
+}
+
+int
+vc4_irq_postinstall(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+ /* Enable both the render done and out of memory interrupts. */
+ V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
+
+ return 0;
+}
+
+void
+vc4_irq_uninstall(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+ /* Disable sending interrupts for our driver's IRQs. */
+ V3D_WRITE(V3D_INTDIS, V3D_DRIVER_IRQS);
+
+ /* Clear any pending interrupts we might have left. */
+ V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
+
+ /* Finish any interrupt handler still in flight. */
+ disable_irq(dev->irq);
+
+ cancel_work_sync(&vc4->overflow_mem_work);
+}
+
+/** Reinitializes interrupt registers when a GPU reset is performed. */
+void vc4_irq_reset(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ unsigned long irqflags;
+
+ /* Acknowledge any stale IRQs. */
+ V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
+
+ /*
+ * Turn all our interrupts on. Binner out of memory is the
+ * only one we expect to trigger at this point, since we've
+ * just come from poweron and haven't supplied any overflow
+ * memory yet.
+ */
+ V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
+
+ spin_lock_irqsave(&vc4->job_lock, irqflags);
+ vc4_cancel_bin_job(dev);
+ vc4_irq_finish_render_job(dev);
+ spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+}
diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
new file mode 100644
index 000000000..f4d8a730e
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_kms.c
@@ -0,0 +1,449 @@
+/*
+ * Copyright (C) 2015 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/**
+ * DOC: VC4 KMS
+ *
+ * This is the general code for implementing KMS mode setting that
+ * doesn't clearly associate with any of the other objects (plane,
+ * crtc, HDMI encoder).
+ */
+
+#include <drm/drm_crtc.h>
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_plane_helper.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
+#include "vc4_drv.h"
+#include "vc4_regs.h"
+
+struct vc4_ctm_state {
+ struct drm_private_state base;
+ struct drm_color_ctm *ctm;
+ int fifo;
+};
+
+static struct vc4_ctm_state *to_vc4_ctm_state(struct drm_private_state *priv)
+{
+ return container_of(priv, struct vc4_ctm_state, base);
+}
+
+static struct vc4_ctm_state *vc4_get_ctm_state(struct drm_atomic_state *state,
+ struct drm_private_obj *manager)
+{
+ struct drm_device *dev = state->dev;
+ struct vc4_dev *vc4 = dev->dev_private;
+ struct drm_private_state *priv_state;
+ int ret;
+
+ ret = drm_modeset_lock(&vc4->ctm_state_lock, state->acquire_ctx);
+ if (ret)
+ return ERR_PTR(ret);
+
+ priv_state = drm_atomic_get_private_obj_state(state, manager);
+ if (IS_ERR(priv_state))
+ return ERR_CAST(priv_state);
+
+ return to_vc4_ctm_state(priv_state);
+}
+
+static struct drm_private_state *
+vc4_ctm_duplicate_state(struct drm_private_obj *obj)
+{
+ struct vc4_ctm_state *state;
+
+ state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
+ if (!state)
+ return NULL;
+
+ __drm_atomic_helper_private_obj_duplicate_state(obj, &state->base);
+
+ return &state->base;
+}
+
+static void vc4_ctm_destroy_state(struct drm_private_obj *obj,
+ struct drm_private_state *state)
+{
+ struct vc4_ctm_state *ctm_state = to_vc4_ctm_state(state);
+
+ kfree(ctm_state);
+}
+
+static const struct drm_private_state_funcs vc4_ctm_state_funcs = {
+ .atomic_duplicate_state = vc4_ctm_duplicate_state,
+ .atomic_destroy_state = vc4_ctm_destroy_state,
+};
+
+/* Converts a DRM S31.32 value to the HW S0.9 format. */
+static u16 vc4_ctm_s31_32_to_s0_9(u64 in)
+{
+ u16 r;
+
+ /* Sign bit. */
+ r = in & BIT_ULL(63) ? BIT(9) : 0;
+
+ if ((in & GENMASK_ULL(62, 32)) > 0) {
+ /* We have zero integer bits so we can only saturate here. */
+ r |= GENMASK(8, 0);
+ } else {
+ /* Otherwise take the 9 most important fractional bits. */
+ r |= (in >> 23) & GENMASK(8, 0);
+ }
+
+ return r;
+}
+
+static void
+vc4_ctm_commit(struct vc4_dev *vc4, struct drm_atomic_state *state)
+{
+ struct vc4_ctm_state *ctm_state = to_vc4_ctm_state(vc4->ctm_manager.state);
+ struct drm_color_ctm *ctm = ctm_state->ctm;
+
+ if (ctm_state->fifo) {
+ HVS_WRITE(SCALER_OLEDCOEF2,
+ VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[0]),
+ SCALER_OLEDCOEF2_R_TO_R) |
+ VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[3]),
+ SCALER_OLEDCOEF2_R_TO_G) |
+ VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[6]),
+ SCALER_OLEDCOEF2_R_TO_B));
+ HVS_WRITE(SCALER_OLEDCOEF1,
+ VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[1]),
+ SCALER_OLEDCOEF1_G_TO_R) |
+ VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[4]),
+ SCALER_OLEDCOEF1_G_TO_G) |
+ VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[7]),
+ SCALER_OLEDCOEF1_G_TO_B));
+ HVS_WRITE(SCALER_OLEDCOEF0,
+ VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[2]),
+ SCALER_OLEDCOEF0_B_TO_R) |
+ VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[5]),
+ SCALER_OLEDCOEF0_B_TO_G) |
+ VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[8]),
+ SCALER_OLEDCOEF0_B_TO_B));
+ }
+
+ HVS_WRITE(SCALER_OLEDOFFS,
+ VC4_SET_FIELD(ctm_state->fifo, SCALER_OLEDOFFS_DISPFIFO));
+}
+
+static void
+vc4_atomic_complete_commit(struct drm_atomic_state *state)
+{
+ struct drm_device *dev = state->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+ drm_atomic_helper_wait_for_fences(dev, state, false);
+
+ drm_atomic_helper_wait_for_dependencies(state);
+
+ drm_atomic_helper_commit_modeset_disables(dev, state);
+
+ vc4_ctm_commit(vc4, state);
+
+ drm_atomic_helper_commit_planes(dev, state, 0);
+
+ drm_atomic_helper_commit_modeset_enables(dev, state);
+
+ drm_atomic_helper_fake_vblank(state);
+
+ drm_atomic_helper_commit_hw_done(state);
+
+ drm_atomic_helper_wait_for_flip_done(dev, state);
+
+ drm_atomic_helper_cleanup_planes(dev, state);
+
+ drm_atomic_helper_commit_cleanup_done(state);
+
+ drm_atomic_state_put(state);
+
+ up(&vc4->async_modeset);
+}
+
+static void commit_work(struct work_struct *work)
+{
+ struct drm_atomic_state *state = container_of(work,
+ struct drm_atomic_state,
+ commit_work);
+ vc4_atomic_complete_commit(state);
+}
+
+/**
+ * vc4_atomic_commit - commit validated state object
+ * @dev: DRM device
+ * @state: the driver state object
+ * @nonblock: nonblocking commit
+ *
+ * This function commits a with drm_atomic_helper_check() pre-validated state
+ * object. This can still fail when e.g. the framebuffer reservation fails. For
+ * now this doesn't implement asynchronous commits.
+ *
+ * RETURNS
+ * Zero for success or -errno.
+ */
+static int vc4_atomic_commit(struct drm_device *dev,
+ struct drm_atomic_state *state,
+ bool nonblock)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ int ret;
+
+ if (state->async_update) {
+ ret = down_interruptible(&vc4->async_modeset);
+ if (ret)
+ return ret;
+
+ ret = drm_atomic_helper_prepare_planes(dev, state);
+ if (ret) {
+ up(&vc4->async_modeset);
+ return ret;
+ }
+
+ drm_atomic_helper_async_commit(dev, state);
+
+ drm_atomic_helper_cleanup_planes(dev, state);
+
+ up(&vc4->async_modeset);
+
+ return 0;
+ }
+
+ /* We know for sure we don't want an async update here. Set
+ * state->legacy_cursor_update to false to prevent
+ * drm_atomic_helper_setup_commit() from auto-completing
+ * commit->flip_done.
+ */
+ state->legacy_cursor_update = false;
+ ret = drm_atomic_helper_setup_commit(state, nonblock);
+ if (ret)
+ return ret;
+
+ INIT_WORK(&state->commit_work, commit_work);
+
+ ret = down_interruptible(&vc4->async_modeset);
+ if (ret)
+ return ret;
+
+ ret = drm_atomic_helper_prepare_planes(dev, state);
+ if (ret) {
+ up(&vc4->async_modeset);
+ return ret;
+ }
+
+ if (!nonblock) {
+ ret = drm_atomic_helper_wait_for_fences(dev, state, true);
+ if (ret) {
+ drm_atomic_helper_cleanup_planes(dev, state);
+ up(&vc4->async_modeset);
+ return ret;
+ }
+ }
+
+ /*
+ * This is the point of no return - everything below never fails except
+ * when the hw goes bonghits. Which means we can commit the new state on
+ * the software side now.
+ */
+
+ BUG_ON(drm_atomic_helper_swap_state(state, false) < 0);
+
+ /*
+ * Everything below can be run asynchronously without the need to grab
+ * any modeset locks at all under one condition: It must be guaranteed
+ * that the asynchronous work has either been cancelled (if the driver
+ * supports it, which at least requires that the framebuffers get
+ * cleaned up with drm_atomic_helper_cleanup_planes()) or completed
+ * before the new state gets committed on the software side with
+ * drm_atomic_helper_swap_state().
+ *
+ * This scheme allows new atomic state updates to be prepared and
+ * checked in parallel to the asynchronous completion of the previous
+ * update. Which is important since compositors need to figure out the
+ * composition of the next frame right after having submitted the
+ * current layout.
+ */
+
+ drm_atomic_state_get(state);
+ if (nonblock)
+ queue_work(system_unbound_wq, &state->commit_work);
+ else
+ vc4_atomic_complete_commit(state);
+
+ return 0;
+}
+
+static struct drm_framebuffer *vc4_fb_create(struct drm_device *dev,
+ struct drm_file *file_priv,
+ const struct drm_mode_fb_cmd2 *mode_cmd)
+{
+ struct drm_mode_fb_cmd2 mode_cmd_local;
+
+ /* If the user didn't specify a modifier, use the
+ * vc4_set_tiling_ioctl() state for the BO.
+ */
+ if (!(mode_cmd->flags & DRM_MODE_FB_MODIFIERS)) {
+ struct drm_gem_object *gem_obj;
+ struct vc4_bo *bo;
+
+ gem_obj = drm_gem_object_lookup(file_priv,
+ mode_cmd->handles[0]);
+ if (!gem_obj) {
+ DRM_DEBUG("Failed to look up GEM BO %d\n",
+ mode_cmd->handles[0]);
+ return ERR_PTR(-ENOENT);
+ }
+ bo = to_vc4_bo(gem_obj);
+
+ mode_cmd_local = *mode_cmd;
+
+ if (bo->t_format) {
+ mode_cmd_local.modifier[0] =
+ DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED;
+ } else {
+ mode_cmd_local.modifier[0] = DRM_FORMAT_MOD_NONE;
+ }
+
+ drm_gem_object_put_unlocked(gem_obj);
+
+ mode_cmd = &mode_cmd_local;
+ }
+
+ return drm_gem_fb_create(dev, file_priv, mode_cmd);
+}
+
+/* Our CTM has some peculiar limitations: we can only enable it for one CRTC
+ * at a time and the HW only supports S0.9 scalars. To account for the latter,
+ * we don't allow userland to set a CTM that we have no hope of approximating.
+ */
+static int
+vc4_ctm_atomic_check(struct drm_device *dev, struct drm_atomic_state *state)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_ctm_state *ctm_state = NULL;
+ struct drm_crtc *crtc;
+ struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+ struct drm_color_ctm *ctm;
+ int i;
+
+ for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
+ /* CTM is being disabled. */
+ if (!new_crtc_state->ctm && old_crtc_state->ctm) {
+ ctm_state = vc4_get_ctm_state(state, &vc4->ctm_manager);
+ if (IS_ERR(ctm_state))
+ return PTR_ERR(ctm_state);
+ ctm_state->fifo = 0;
+ }
+ }
+
+ for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
+ if (new_crtc_state->ctm == old_crtc_state->ctm)
+ continue;
+
+ if (!ctm_state) {
+ ctm_state = vc4_get_ctm_state(state, &vc4->ctm_manager);
+ if (IS_ERR(ctm_state))
+ return PTR_ERR(ctm_state);
+ }
+
+ /* CTM is being enabled or the matrix changed. */
+ if (new_crtc_state->ctm) {
+ /* fifo is 1-based since 0 disables CTM. */
+ int fifo = to_vc4_crtc(crtc)->channel + 1;
+
+ /* Check userland isn't trying to turn on CTM for more
+ * than one CRTC at a time.
+ */
+ if (ctm_state->fifo && ctm_state->fifo != fifo) {
+ DRM_DEBUG_DRIVER("Too many CTM configured\n");
+ return -EINVAL;
+ }
+
+ /* Check we can approximate the specified CTM.
+ * We disallow scalars |c| > 1.0 since the HW has
+ * no integer bits.
+ */
+ ctm = new_crtc_state->ctm->data;
+ for (i = 0; i < ARRAY_SIZE(ctm->matrix); i++) {
+ u64 val = ctm->matrix[i];
+
+ val &= ~BIT_ULL(63);
+ if (val > BIT_ULL(32))
+ return -EINVAL;
+ }
+
+ ctm_state->fifo = fifo;
+ ctm_state->ctm = ctm;
+ }
+ }
+
+ return 0;
+}
+
+static int
+vc4_atomic_check(struct drm_device *dev, struct drm_atomic_state *state)
+{
+ int ret;
+
+ ret = vc4_ctm_atomic_check(dev, state);
+ if (ret < 0)
+ return ret;
+
+ return drm_atomic_helper_check(dev, state);
+}
+
+static const struct drm_mode_config_funcs vc4_mode_funcs = {
+ .output_poll_changed = drm_fb_helper_output_poll_changed,
+ .atomic_check = vc4_atomic_check,
+ .atomic_commit = vc4_atomic_commit,
+ .fb_create = vc4_fb_create,
+};
+
+int vc4_kms_load(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_ctm_state *ctm_state;
+ int ret;
+
+ sema_init(&vc4->async_modeset, 1);
+
+ /* Set support for vblank irq fast disable, before drm_vblank_init() */
+ dev->vblank_disable_immediate = true;
+
+ ret = drm_vblank_init(dev, dev->mode_config.num_crtc);
+ if (ret < 0) {
+ dev_err(dev->dev, "failed to initialize vblank\n");
+ return ret;
+ }
+
+ dev->mode_config.max_width = 2048;
+ dev->mode_config.max_height = 2048;
+ dev->mode_config.funcs = &vc4_mode_funcs;
+ dev->mode_config.preferred_depth = 24;
+ dev->mode_config.async_page_flip = true;
+ dev->mode_config.allow_fb_modifiers = true;
+
+ drm_modeset_lock_init(&vc4->ctm_state_lock);
+
+ ctm_state = kzalloc(sizeof(*ctm_state), GFP_KERNEL);
+ if (!ctm_state)
+ return -ENOMEM;
+ drm_atomic_private_obj_init(&vc4->ctm_manager, &ctm_state->base,
+ &vc4_ctm_state_funcs);
+
+ drm_mode_config_reset(dev);
+
+ if (dev->mode_config.num_connector)
+ drm_fb_cma_fbdev_init(dev, 32, 0);
+
+ drm_kms_helper_poll_init(dev);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/vc4/vc4_packet.h b/drivers/gpu/drm/vc4/vc4_packet.h
new file mode 100644
index 000000000..0f31cc065
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_packet.h
@@ -0,0 +1,399 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC4_PACKET_H
+#define VC4_PACKET_H
+
+#include "vc4_regs.h" /* for VC4_MASK, VC4_GET_FIELD, VC4_SET_FIELD */
+
+enum vc4_packet {
+ VC4_PACKET_HALT = 0,
+ VC4_PACKET_NOP = 1,
+
+ VC4_PACKET_FLUSH = 4,
+ VC4_PACKET_FLUSH_ALL = 5,
+ VC4_PACKET_START_TILE_BINNING = 6,
+ VC4_PACKET_INCREMENT_SEMAPHORE = 7,
+ VC4_PACKET_WAIT_ON_SEMAPHORE = 8,
+
+ VC4_PACKET_BRANCH = 16,
+ VC4_PACKET_BRANCH_TO_SUB_LIST = 17,
+
+ VC4_PACKET_STORE_MS_TILE_BUFFER = 24,
+ VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF = 25,
+ VC4_PACKET_STORE_FULL_RES_TILE_BUFFER = 26,
+ VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER = 27,
+ VC4_PACKET_STORE_TILE_BUFFER_GENERAL = 28,
+ VC4_PACKET_LOAD_TILE_BUFFER_GENERAL = 29,
+
+ VC4_PACKET_GL_INDEXED_PRIMITIVE = 32,
+ VC4_PACKET_GL_ARRAY_PRIMITIVE = 33,
+
+ VC4_PACKET_COMPRESSED_PRIMITIVE = 48,
+ VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE = 49,
+
+ VC4_PACKET_PRIMITIVE_LIST_FORMAT = 56,
+
+ VC4_PACKET_GL_SHADER_STATE = 64,
+ VC4_PACKET_NV_SHADER_STATE = 65,
+ VC4_PACKET_VG_SHADER_STATE = 66,
+
+ VC4_PACKET_CONFIGURATION_BITS = 96,
+ VC4_PACKET_FLAT_SHADE_FLAGS = 97,
+ VC4_PACKET_POINT_SIZE = 98,
+ VC4_PACKET_LINE_WIDTH = 99,
+ VC4_PACKET_RHT_X_BOUNDARY = 100,
+ VC4_PACKET_DEPTH_OFFSET = 101,
+ VC4_PACKET_CLIP_WINDOW = 102,
+ VC4_PACKET_VIEWPORT_OFFSET = 103,
+ VC4_PACKET_Z_CLIPPING = 104,
+ VC4_PACKET_CLIPPER_XY_SCALING = 105,
+ VC4_PACKET_CLIPPER_Z_SCALING = 106,
+
+ VC4_PACKET_TILE_BINNING_MODE_CONFIG = 112,
+ VC4_PACKET_TILE_RENDERING_MODE_CONFIG = 113,
+ VC4_PACKET_CLEAR_COLORS = 114,
+ VC4_PACKET_TILE_COORDINATES = 115,
+
+ /* Not an actual hardware packet -- this is what we use to put
+ * references to GEM bos in the command stream, since we need the u32
+ * int the actual address packet in order to store the offset from the
+ * start of the BO.
+ */
+ VC4_PACKET_GEM_HANDLES = 254,
+} __attribute__ ((__packed__));
+
+#define VC4_PACKET_HALT_SIZE 1
+#define VC4_PACKET_NOP_SIZE 1
+#define VC4_PACKET_FLUSH_SIZE 1
+#define VC4_PACKET_FLUSH_ALL_SIZE 1
+#define VC4_PACKET_START_TILE_BINNING_SIZE 1
+#define VC4_PACKET_INCREMENT_SEMAPHORE_SIZE 1
+#define VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE 1
+#define VC4_PACKET_BRANCH_SIZE 5
+#define VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE 5
+#define VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE 1
+#define VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF_SIZE 1
+#define VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE 5
+#define VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE 5
+#define VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE 7
+#define VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE 7
+#define VC4_PACKET_GL_INDEXED_PRIMITIVE_SIZE 14
+#define VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE 10
+#define VC4_PACKET_COMPRESSED_PRIMITIVE_SIZE 1
+#define VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE_SIZE 1
+#define VC4_PACKET_PRIMITIVE_LIST_FORMAT_SIZE 2
+#define VC4_PACKET_GL_SHADER_STATE_SIZE 5
+#define VC4_PACKET_NV_SHADER_STATE_SIZE 5
+#define VC4_PACKET_VG_SHADER_STATE_SIZE 5
+#define VC4_PACKET_CONFIGURATION_BITS_SIZE 4
+#define VC4_PACKET_FLAT_SHADE_FLAGS_SIZE 5
+#define VC4_PACKET_POINT_SIZE_SIZE 5
+#define VC4_PACKET_LINE_WIDTH_SIZE 5
+#define VC4_PACKET_RHT_X_BOUNDARY_SIZE 3
+#define VC4_PACKET_DEPTH_OFFSET_SIZE 5
+#define VC4_PACKET_CLIP_WINDOW_SIZE 9
+#define VC4_PACKET_VIEWPORT_OFFSET_SIZE 5
+#define VC4_PACKET_Z_CLIPPING_SIZE 9
+#define VC4_PACKET_CLIPPER_XY_SCALING_SIZE 9
+#define VC4_PACKET_CLIPPER_Z_SCALING_SIZE 9
+#define VC4_PACKET_TILE_BINNING_MODE_CONFIG_SIZE 16
+#define VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE 11
+#define VC4_PACKET_CLEAR_COLORS_SIZE 14
+#define VC4_PACKET_TILE_COORDINATES_SIZE 3
+#define VC4_PACKET_GEM_HANDLES_SIZE 9
+
+/* Number of multisamples supported. */
+#define VC4_MAX_SAMPLES 4
+/* Size of a full resolution color or Z tile buffer load/store. */
+#define VC4_TILE_BUFFER_SIZE (64 * 64 * 4)
+
+/** @{
+ * Bits used by packets like VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
+ * VC4_PACKET_TILE_RENDERING_MODE_CONFIG.
+*/
+#define VC4_TILING_FORMAT_LINEAR 0
+#define VC4_TILING_FORMAT_T 1
+#define VC4_TILING_FORMAT_LT 2
+/** @} */
+
+/** @{
+ *
+ * low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and
+ * VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER.
+ */
+#define VC4_LOADSTORE_FULL_RES_EOF BIT(3)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL BIT(2)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_ZS BIT(1)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR BIT(0)
+
+/** @{
+ *
+ * low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and
+ * VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER.
+ */
+#define VC4_LOADSTORE_FULL_RES_EOF BIT(3)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL BIT(2)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_ZS BIT(1)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR BIT(0)
+
+/** @{
+ *
+ * byte 2 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
+ * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL (low bits of the address)
+ */
+
+#define VC4_LOADSTORE_TILE_BUFFER_EOF BIT(3)
+#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_VG_MASK BIT(2)
+#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_ZS BIT(1)
+#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_COLOR BIT(0)
+
+/** @} */
+
+/** @{
+ *
+ * byte 0-1 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
+ * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
+ */
+#define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR BIT(15)
+#define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR BIT(14)
+#define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR BIT(13)
+#define VC4_STORE_TILE_BUFFER_DISABLE_SWAP BIT(12)
+
+#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK VC4_MASK(9, 8)
+#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT 8
+#define VC4_LOADSTORE_TILE_BUFFER_RGBA8888 0
+#define VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER 1
+#define VC4_LOADSTORE_TILE_BUFFER_BGR565 2
+/** @} */
+
+/** @{
+ *
+ * byte 0 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
+ * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
+ */
+#define VC4_STORE_TILE_BUFFER_MODE_MASK VC4_MASK(7, 6)
+#define VC4_STORE_TILE_BUFFER_MODE_SHIFT 6
+#define VC4_STORE_TILE_BUFFER_MODE_SAMPLE0 (0 << 6)
+#define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X4 (1 << 6)
+#define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X16 (2 << 6)
+
+/** The values of the field are VC4_TILING_FORMAT_* */
+#define VC4_LOADSTORE_TILE_BUFFER_TILING_MASK VC4_MASK(5, 4)
+#define VC4_LOADSTORE_TILE_BUFFER_TILING_SHIFT 4
+
+#define VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK VC4_MASK(2, 0)
+#define VC4_LOADSTORE_TILE_BUFFER_BUFFER_SHIFT 0
+#define VC4_LOADSTORE_TILE_BUFFER_NONE 0
+#define VC4_LOADSTORE_TILE_BUFFER_COLOR 1
+#define VC4_LOADSTORE_TILE_BUFFER_ZS 2
+#define VC4_LOADSTORE_TILE_BUFFER_Z 3
+#define VC4_LOADSTORE_TILE_BUFFER_VG_MASK 4
+#define VC4_LOADSTORE_TILE_BUFFER_FULL 5
+/** @} */
+
+#define VC4_INDEX_BUFFER_U8 (0 << 4)
+#define VC4_INDEX_BUFFER_U16 (1 << 4)
+
+/* This flag is only present in NV shader state. */
+#define VC4_SHADER_FLAG_SHADED_CLIP_COORDS BIT(3)
+#define VC4_SHADER_FLAG_ENABLE_CLIPPING BIT(2)
+#define VC4_SHADER_FLAG_VS_POINT_SIZE BIT(1)
+#define VC4_SHADER_FLAG_FS_SINGLE_THREAD BIT(0)
+
+/** @{ byte 2 of config bits. */
+#define VC4_CONFIG_BITS_EARLY_Z_UPDATE BIT(1)
+#define VC4_CONFIG_BITS_EARLY_Z BIT(0)
+/** @} */
+
+/** @{ byte 1 of config bits. */
+#define VC4_CONFIG_BITS_Z_UPDATE BIT(7)
+/** same values in this 3-bit field as PIPE_FUNC_* */
+#define VC4_CONFIG_BITS_DEPTH_FUNC_SHIFT 4
+#define VC4_CONFIG_BITS_COVERAGE_READ_LEAVE BIT(3)
+
+#define VC4_CONFIG_BITS_COVERAGE_UPDATE_NONZERO (0 << 1)
+#define VC4_CONFIG_BITS_COVERAGE_UPDATE_ODD (1 << 1)
+#define VC4_CONFIG_BITS_COVERAGE_UPDATE_OR (2 << 1)
+#define VC4_CONFIG_BITS_COVERAGE_UPDATE_ZERO (3 << 1)
+
+#define VC4_CONFIG_BITS_COVERAGE_PIPE_SELECT BIT(0)
+/** @} */
+
+/** @{ byte 0 of config bits. */
+#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_NONE (0 << 6)
+#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_4X (1 << 6)
+#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_16X (2 << 6)
+
+#define VC4_CONFIG_BITS_AA_POINTS_AND_LINES BIT(4)
+#define VC4_CONFIG_BITS_ENABLE_DEPTH_OFFSET BIT(3)
+#define VC4_CONFIG_BITS_CW_PRIMITIVES BIT(2)
+#define VC4_CONFIG_BITS_ENABLE_PRIM_BACK BIT(1)
+#define VC4_CONFIG_BITS_ENABLE_PRIM_FRONT BIT(0)
+/** @} */
+
+/** @{ bits in the last u8 of VC4_PACKET_TILE_BINNING_MODE_CONFIG */
+#define VC4_BIN_CONFIG_DB_NON_MS BIT(7)
+
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK VC4_MASK(6, 5)
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_SHIFT 5
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32 0
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_64 1
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128 2
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_256 3
+
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK VC4_MASK(4, 3)
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_SHIFT 3
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32 0
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_64 1
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128 2
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256 3
+
+#define VC4_BIN_CONFIG_AUTO_INIT_TSDA BIT(2)
+#define VC4_BIN_CONFIG_TILE_BUFFER_64BIT BIT(1)
+#define VC4_BIN_CONFIG_MS_MODE_4X BIT(0)
+/** @} */
+
+/** @{ bits in the last u16 of VC4_PACKET_TILE_RENDERING_MODE_CONFIG */
+#define VC4_RENDER_CONFIG_DB_NON_MS BIT(12)
+#define VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE BIT(11)
+#define VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G BIT(10)
+#define VC4_RENDER_CONFIG_COVERAGE_MODE BIT(9)
+#define VC4_RENDER_CONFIG_ENABLE_VG_MASK BIT(8)
+
+/** The values of the field are VC4_TILING_FORMAT_* */
+#define VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK VC4_MASK(7, 6)
+#define VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT 6
+
+#define VC4_RENDER_CONFIG_DECIMATE_MODE_1X (0 << 4)
+#define VC4_RENDER_CONFIG_DECIMATE_MODE_4X (1 << 4)
+#define VC4_RENDER_CONFIG_DECIMATE_MODE_16X (2 << 4)
+
+#define VC4_RENDER_CONFIG_FORMAT_MASK VC4_MASK(3, 2)
+#define VC4_RENDER_CONFIG_FORMAT_SHIFT 2
+#define VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED 0
+#define VC4_RENDER_CONFIG_FORMAT_RGBA8888 1
+#define VC4_RENDER_CONFIG_FORMAT_BGR565 2
+
+#define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT BIT(1)
+#define VC4_RENDER_CONFIG_MS_MODE_4X BIT(0)
+
+#define VC4_PRIMITIVE_LIST_FORMAT_16_INDEX (1 << 4)
+#define VC4_PRIMITIVE_LIST_FORMAT_32_XY (3 << 4)
+#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_POINTS (0 << 0)
+#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_LINES (1 << 0)
+#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES (2 << 0)
+#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_RHT (3 << 0)
+
+enum vc4_texture_data_type {
+ VC4_TEXTURE_TYPE_RGBA8888 = 0,
+ VC4_TEXTURE_TYPE_RGBX8888 = 1,
+ VC4_TEXTURE_TYPE_RGBA4444 = 2,
+ VC4_TEXTURE_TYPE_RGBA5551 = 3,
+ VC4_TEXTURE_TYPE_RGB565 = 4,
+ VC4_TEXTURE_TYPE_LUMINANCE = 5,
+ VC4_TEXTURE_TYPE_ALPHA = 6,
+ VC4_TEXTURE_TYPE_LUMALPHA = 7,
+ VC4_TEXTURE_TYPE_ETC1 = 8,
+ VC4_TEXTURE_TYPE_S16F = 9,
+ VC4_TEXTURE_TYPE_S8 = 10,
+ VC4_TEXTURE_TYPE_S16 = 11,
+ VC4_TEXTURE_TYPE_BW1 = 12,
+ VC4_TEXTURE_TYPE_A4 = 13,
+ VC4_TEXTURE_TYPE_A1 = 14,
+ VC4_TEXTURE_TYPE_RGBA64 = 15,
+ VC4_TEXTURE_TYPE_RGBA32R = 16,
+ VC4_TEXTURE_TYPE_YUV422R = 17,
+};
+
+#define VC4_TEX_P0_OFFSET_MASK VC4_MASK(31, 12)
+#define VC4_TEX_P0_OFFSET_SHIFT 12
+#define VC4_TEX_P0_CSWIZ_MASK VC4_MASK(11, 10)
+#define VC4_TEX_P0_CSWIZ_SHIFT 10
+#define VC4_TEX_P0_CMMODE_MASK VC4_MASK(9, 9)
+#define VC4_TEX_P0_CMMODE_SHIFT 9
+#define VC4_TEX_P0_FLIPY_MASK VC4_MASK(8, 8)
+#define VC4_TEX_P0_FLIPY_SHIFT 8
+#define VC4_TEX_P0_TYPE_MASK VC4_MASK(7, 4)
+#define VC4_TEX_P0_TYPE_SHIFT 4
+#define VC4_TEX_P0_MIPLVLS_MASK VC4_MASK(3, 0)
+#define VC4_TEX_P0_MIPLVLS_SHIFT 0
+
+#define VC4_TEX_P1_TYPE4_MASK VC4_MASK(31, 31)
+#define VC4_TEX_P1_TYPE4_SHIFT 31
+#define VC4_TEX_P1_HEIGHT_MASK VC4_MASK(30, 20)
+#define VC4_TEX_P1_HEIGHT_SHIFT 20
+#define VC4_TEX_P1_ETCFLIP_MASK VC4_MASK(19, 19)
+#define VC4_TEX_P1_ETCFLIP_SHIFT 19
+#define VC4_TEX_P1_WIDTH_MASK VC4_MASK(18, 8)
+#define VC4_TEX_P1_WIDTH_SHIFT 8
+
+#define VC4_TEX_P1_MAGFILT_MASK VC4_MASK(7, 7)
+#define VC4_TEX_P1_MAGFILT_SHIFT 7
+# define VC4_TEX_P1_MAGFILT_LINEAR 0
+# define VC4_TEX_P1_MAGFILT_NEAREST 1
+
+#define VC4_TEX_P1_MINFILT_MASK VC4_MASK(6, 4)
+#define VC4_TEX_P1_MINFILT_SHIFT 4
+# define VC4_TEX_P1_MINFILT_LINEAR 0
+# define VC4_TEX_P1_MINFILT_NEAREST 1
+# define VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR 2
+# define VC4_TEX_P1_MINFILT_NEAR_MIP_LIN 3
+# define VC4_TEX_P1_MINFILT_LIN_MIP_NEAR 4
+# define VC4_TEX_P1_MINFILT_LIN_MIP_LIN 5
+
+#define VC4_TEX_P1_WRAP_T_MASK VC4_MASK(3, 2)
+#define VC4_TEX_P1_WRAP_T_SHIFT 2
+#define VC4_TEX_P1_WRAP_S_MASK VC4_MASK(1, 0)
+#define VC4_TEX_P1_WRAP_S_SHIFT 0
+# define VC4_TEX_P1_WRAP_REPEAT 0
+# define VC4_TEX_P1_WRAP_CLAMP 1
+# define VC4_TEX_P1_WRAP_MIRROR 2
+# define VC4_TEX_P1_WRAP_BORDER 3
+
+#define VC4_TEX_P2_PTYPE_MASK VC4_MASK(31, 30)
+#define VC4_TEX_P2_PTYPE_SHIFT 30
+# define VC4_TEX_P2_PTYPE_IGNORED 0
+# define VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE 1
+# define VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS 2
+# define VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS 3
+
+/* VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE bits */
+#define VC4_TEX_P2_CMST_MASK VC4_MASK(29, 12)
+#define VC4_TEX_P2_CMST_SHIFT 12
+#define VC4_TEX_P2_BSLOD_MASK VC4_MASK(0, 0)
+#define VC4_TEX_P2_BSLOD_SHIFT 0
+
+/* VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS */
+#define VC4_TEX_P2_CHEIGHT_MASK VC4_MASK(22, 12)
+#define VC4_TEX_P2_CHEIGHT_SHIFT 12
+#define VC4_TEX_P2_CWIDTH_MASK VC4_MASK(10, 0)
+#define VC4_TEX_P2_CWIDTH_SHIFT 0
+
+/* VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS */
+#define VC4_TEX_P2_CYOFF_MASK VC4_MASK(22, 12)
+#define VC4_TEX_P2_CYOFF_SHIFT 12
+#define VC4_TEX_P2_CXOFF_MASK VC4_MASK(10, 0)
+#define VC4_TEX_P2_CXOFF_SHIFT 0
+
+#endif /* VC4_PACKET_H */
diff --git a/drivers/gpu/drm/vc4/vc4_perfmon.c b/drivers/gpu/drm/vc4/vc4_perfmon.c
new file mode 100644
index 000000000..437e7a27f
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_perfmon.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018 Broadcom
+ */
+
+/**
+ * DOC: VC4 V3D performance monitor module
+ *
+ * The V3D block provides 16 hardware counters which can count various events.
+ */
+
+#include "vc4_drv.h"
+#include "vc4_regs.h"
+
+#define VC4_PERFMONID_MIN 1
+#define VC4_PERFMONID_MAX U32_MAX
+
+void vc4_perfmon_get(struct vc4_perfmon *perfmon)
+{
+ if (perfmon)
+ refcount_inc(&perfmon->refcnt);
+}
+
+void vc4_perfmon_put(struct vc4_perfmon *perfmon)
+{
+ if (perfmon && refcount_dec_and_test(&perfmon->refcnt))
+ kfree(perfmon);
+}
+
+void vc4_perfmon_start(struct vc4_dev *vc4, struct vc4_perfmon *perfmon)
+{
+ unsigned int i;
+ u32 mask;
+
+ if (WARN_ON_ONCE(!perfmon || vc4->active_perfmon))
+ return;
+
+ for (i = 0; i < perfmon->ncounters; i++)
+ V3D_WRITE(V3D_PCTRS(i), perfmon->events[i]);
+
+ mask = GENMASK(perfmon->ncounters - 1, 0);
+ V3D_WRITE(V3D_PCTRC, mask);
+ V3D_WRITE(V3D_PCTRE, V3D_PCTRE_EN | mask);
+ vc4->active_perfmon = perfmon;
+}
+
+void vc4_perfmon_stop(struct vc4_dev *vc4, struct vc4_perfmon *perfmon,
+ bool capture)
+{
+ unsigned int i;
+
+ if (WARN_ON_ONCE(!vc4->active_perfmon ||
+ perfmon != vc4->active_perfmon))
+ return;
+
+ if (capture) {
+ for (i = 0; i < perfmon->ncounters; i++)
+ perfmon->counters[i] += V3D_READ(V3D_PCTR(i));
+ }
+
+ V3D_WRITE(V3D_PCTRE, 0);
+ vc4->active_perfmon = NULL;
+}
+
+struct vc4_perfmon *vc4_perfmon_find(struct vc4_file *vc4file, int id)
+{
+ struct vc4_perfmon *perfmon;
+
+ mutex_lock(&vc4file->perfmon.lock);
+ perfmon = idr_find(&vc4file->perfmon.idr, id);
+ vc4_perfmon_get(perfmon);
+ mutex_unlock(&vc4file->perfmon.lock);
+
+ return perfmon;
+}
+
+void vc4_perfmon_open_file(struct vc4_file *vc4file)
+{
+ mutex_init(&vc4file->perfmon.lock);
+ idr_init(&vc4file->perfmon.idr);
+}
+
+static int vc4_perfmon_idr_del(int id, void *elem, void *data)
+{
+ struct vc4_perfmon *perfmon = elem;
+
+ vc4_perfmon_put(perfmon);
+
+ return 0;
+}
+
+void vc4_perfmon_close_file(struct vc4_file *vc4file)
+{
+ mutex_lock(&vc4file->perfmon.lock);
+ idr_for_each(&vc4file->perfmon.idr, vc4_perfmon_idr_del, NULL);
+ idr_destroy(&vc4file->perfmon.idr);
+ mutex_unlock(&vc4file->perfmon.lock);
+}
+
+int vc4_perfmon_create_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct vc4_file *vc4file = file_priv->driver_priv;
+ struct drm_vc4_perfmon_create *req = data;
+ struct vc4_perfmon *perfmon;
+ unsigned int i;
+ int ret;
+
+ /* Number of monitored counters cannot exceed HW limits. */
+ if (req->ncounters > DRM_VC4_MAX_PERF_COUNTERS ||
+ !req->ncounters)
+ return -EINVAL;
+
+ /* Make sure all events are valid. */
+ for (i = 0; i < req->ncounters; i++) {
+ if (req->events[i] >= VC4_PERFCNT_NUM_EVENTS)
+ return -EINVAL;
+ }
+
+ perfmon = kzalloc(sizeof(*perfmon) + (req->ncounters * sizeof(u64)),
+ GFP_KERNEL);
+ if (!perfmon)
+ return -ENOMEM;
+
+ for (i = 0; i < req->ncounters; i++)
+ perfmon->events[i] = req->events[i];
+
+ perfmon->ncounters = req->ncounters;
+
+ refcount_set(&perfmon->refcnt, 1);
+
+ mutex_lock(&vc4file->perfmon.lock);
+ ret = idr_alloc(&vc4file->perfmon.idr, perfmon, VC4_PERFMONID_MIN,
+ VC4_PERFMONID_MAX, GFP_KERNEL);
+ mutex_unlock(&vc4file->perfmon.lock);
+
+ if (ret < 0) {
+ kfree(perfmon);
+ return ret;
+ }
+
+ req->id = ret;
+ return 0;
+}
+
+int vc4_perfmon_destroy_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct vc4_file *vc4file = file_priv->driver_priv;
+ struct drm_vc4_perfmon_destroy *req = data;
+ struct vc4_perfmon *perfmon;
+
+ mutex_lock(&vc4file->perfmon.lock);
+ perfmon = idr_remove(&vc4file->perfmon.idr, req->id);
+ mutex_unlock(&vc4file->perfmon.lock);
+
+ if (!perfmon)
+ return -EINVAL;
+
+ vc4_perfmon_put(perfmon);
+ return 0;
+}
+
+int vc4_perfmon_get_values_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct vc4_file *vc4file = file_priv->driver_priv;
+ struct drm_vc4_perfmon_get_values *req = data;
+ struct vc4_perfmon *perfmon;
+ int ret;
+
+ mutex_lock(&vc4file->perfmon.lock);
+ perfmon = idr_find(&vc4file->perfmon.idr, req->id);
+ vc4_perfmon_get(perfmon);
+ mutex_unlock(&vc4file->perfmon.lock);
+
+ if (!perfmon)
+ return -EINVAL;
+
+ if (copy_to_user(u64_to_user_ptr(req->values_ptr), perfmon->counters,
+ perfmon->ncounters * sizeof(u64)))
+ ret = -EFAULT;
+ else
+ ret = 0;
+
+ vc4_perfmon_put(perfmon);
+ return ret;
+}
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
new file mode 100644
index 000000000..39e608271
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -0,0 +1,1015 @@
+/*
+ * Copyright (C) 2015 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/**
+ * DOC: VC4 plane module
+ *
+ * Each DRM plane is a layer of pixels being scanned out by the HVS.
+ *
+ * At atomic modeset check time, we compute the HVS display element
+ * state that would be necessary for displaying the plane (giving us a
+ * chance to figure out if a plane configuration is invalid), then at
+ * atomic flush time the CRTC will ask us to write our element state
+ * into the region of the HVS that it has allocated for us.
+ */
+
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_plane_helper.h>
+
+#include "uapi/drm/vc4_drm.h"
+#include "vc4_drv.h"
+#include "vc4_regs.h"
+
+static const struct hvs_format {
+ u32 drm; /* DRM_FORMAT_* */
+ u32 hvs; /* HVS_FORMAT_* */
+ u32 pixel_order;
+} hvs_formats[] = {
+ {
+ .drm = DRM_FORMAT_XRGB8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888,
+ .pixel_order = HVS_PIXEL_ORDER_ABGR,
+ },
+ {
+ .drm = DRM_FORMAT_ARGB8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888,
+ .pixel_order = HVS_PIXEL_ORDER_ABGR,
+ },
+ {
+ .drm = DRM_FORMAT_ABGR8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888,
+ .pixel_order = HVS_PIXEL_ORDER_ARGB,
+ },
+ {
+ .drm = DRM_FORMAT_XBGR8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888,
+ .pixel_order = HVS_PIXEL_ORDER_ARGB,
+ },
+ {
+ .drm = DRM_FORMAT_RGB565, .hvs = HVS_PIXEL_FORMAT_RGB565,
+ .pixel_order = HVS_PIXEL_ORDER_XRGB,
+ },
+ {
+ .drm = DRM_FORMAT_BGR565, .hvs = HVS_PIXEL_FORMAT_RGB565,
+ .pixel_order = HVS_PIXEL_ORDER_XBGR,
+ },
+ {
+ .drm = DRM_FORMAT_ARGB1555, .hvs = HVS_PIXEL_FORMAT_RGBA5551,
+ .pixel_order = HVS_PIXEL_ORDER_ABGR,
+ },
+ {
+ .drm = DRM_FORMAT_XRGB1555, .hvs = HVS_PIXEL_FORMAT_RGBA5551,
+ .pixel_order = HVS_PIXEL_ORDER_ABGR,
+ },
+ {
+ .drm = DRM_FORMAT_RGB888, .hvs = HVS_PIXEL_FORMAT_RGB888,
+ .pixel_order = HVS_PIXEL_ORDER_XRGB,
+ },
+ {
+ .drm = DRM_FORMAT_BGR888, .hvs = HVS_PIXEL_FORMAT_RGB888,
+ .pixel_order = HVS_PIXEL_ORDER_XBGR,
+ },
+ {
+ .drm = DRM_FORMAT_YUV422,
+ .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
+ .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
+ },
+ {
+ .drm = DRM_FORMAT_YVU422,
+ .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
+ .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
+ },
+ {
+ .drm = DRM_FORMAT_YUV420,
+ .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
+ .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
+ },
+ {
+ .drm = DRM_FORMAT_YVU420,
+ .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
+ .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
+ },
+ {
+ .drm = DRM_FORMAT_NV12,
+ .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
+ .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
+ },
+ {
+ .drm = DRM_FORMAT_NV21,
+ .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
+ .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
+ },
+ {
+ .drm = DRM_FORMAT_NV16,
+ .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
+ .pixel_order = HVS_PIXEL_ORDER_XYCBCR,
+ },
+ {
+ .drm = DRM_FORMAT_NV61,
+ .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
+ .pixel_order = HVS_PIXEL_ORDER_XYCRCB,
+ },
+};
+
+static const struct hvs_format *vc4_get_hvs_format(u32 drm_format)
+{
+ unsigned i;
+
+ for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
+ if (hvs_formats[i].drm == drm_format)
+ return &hvs_formats[i];
+ }
+
+ return NULL;
+}
+
+static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst)
+{
+ if (dst > src)
+ return VC4_SCALING_PPF;
+ else if (dst < src)
+ return VC4_SCALING_TPZ;
+ else
+ return VC4_SCALING_NONE;
+}
+
+static bool plane_enabled(struct drm_plane_state *state)
+{
+ return state->fb && state->crtc;
+}
+
+static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
+{
+ struct vc4_plane_state *vc4_state;
+
+ if (WARN_ON(!plane->state))
+ return NULL;
+
+ vc4_state = kmemdup(plane->state, sizeof(*vc4_state), GFP_KERNEL);
+ if (!vc4_state)
+ return NULL;
+
+ memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm));
+
+ __drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base);
+
+ if (vc4_state->dlist) {
+ vc4_state->dlist = kmemdup(vc4_state->dlist,
+ vc4_state->dlist_count * 4,
+ GFP_KERNEL);
+ if (!vc4_state->dlist) {
+ kfree(vc4_state);
+ return NULL;
+ }
+ vc4_state->dlist_size = vc4_state->dlist_count;
+ }
+
+ return &vc4_state->base;
+}
+
+static void vc4_plane_destroy_state(struct drm_plane *plane,
+ struct drm_plane_state *state)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
+ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
+
+ if (vc4_state->lbm.allocated) {
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
+ drm_mm_remove_node(&vc4_state->lbm);
+ spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
+ }
+
+ kfree(vc4_state->dlist);
+ __drm_atomic_helper_plane_destroy_state(&vc4_state->base);
+ kfree(state);
+}
+
+/* Called during init to allocate the plane's atomic state. */
+static void vc4_plane_reset(struct drm_plane *plane)
+{
+ struct vc4_plane_state *vc4_state;
+
+ WARN_ON(plane->state);
+
+ vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL);
+ if (!vc4_state)
+ return;
+
+ plane->state = &vc4_state->base;
+ plane->state->alpha = DRM_BLEND_ALPHA_OPAQUE;
+ vc4_state->base.plane = plane;
+}
+
+static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val)
+{
+ if (vc4_state->dlist_count == vc4_state->dlist_size) {
+ u32 new_size = max(4u, vc4_state->dlist_count * 2);
+ u32 *new_dlist = kmalloc_array(new_size, 4, GFP_KERNEL);
+
+ if (!new_dlist)
+ return;
+ memcpy(new_dlist, vc4_state->dlist, vc4_state->dlist_count * 4);
+
+ kfree(vc4_state->dlist);
+ vc4_state->dlist = new_dlist;
+ vc4_state->dlist_size = new_size;
+ }
+
+ vc4_state->dlist[vc4_state->dlist_count++] = val;
+}
+
+/* Returns the scl0/scl1 field based on whether the dimensions need to
+ * be up/down/non-scaled.
+ *
+ * This is a replication of a table from the spec.
+ */
+static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane)
+{
+ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
+
+ switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) {
+ case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF:
+ return SCALER_CTL0_SCL_H_PPF_V_PPF;
+ case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF:
+ return SCALER_CTL0_SCL_H_TPZ_V_PPF;
+ case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ:
+ return SCALER_CTL0_SCL_H_PPF_V_TPZ;
+ case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ:
+ return SCALER_CTL0_SCL_H_TPZ_V_TPZ;
+ case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE:
+ return SCALER_CTL0_SCL_H_PPF_V_NONE;
+ case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF:
+ return SCALER_CTL0_SCL_H_NONE_V_PPF;
+ case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ:
+ return SCALER_CTL0_SCL_H_NONE_V_TPZ;
+ case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE:
+ return SCALER_CTL0_SCL_H_TPZ_V_NONE;
+ default:
+ case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE:
+ /* The unity case is independently handled by
+ * SCALER_CTL0_UNITY.
+ */
+ return 0;
+ }
+}
+
+static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
+{
+ struct drm_plane *plane = state->plane;
+ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
+ struct drm_framebuffer *fb = state->fb;
+ struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
+ u32 subpixel_src_mask = (1 << 16) - 1;
+ u32 format = fb->format->format;
+ int num_planes = fb->format->num_planes;
+ u32 h_subsample = 1;
+ u32 v_subsample = 1;
+ int i;
+
+ for (i = 0; i < num_planes; i++)
+ vc4_state->offsets[i] = bo->paddr + fb->offsets[i];
+
+ /* We don't support subpixel source positioning for scaling. */
+ if ((state->src_x & subpixel_src_mask) ||
+ (state->src_y & subpixel_src_mask) ||
+ (state->src_w & subpixel_src_mask) ||
+ (state->src_h & subpixel_src_mask)) {
+ return -EINVAL;
+ }
+
+ vc4_state->src_x = state->src_x >> 16;
+ vc4_state->src_y = state->src_y >> 16;
+ vc4_state->src_w[0] = state->src_w >> 16;
+ vc4_state->src_h[0] = state->src_h >> 16;
+
+ vc4_state->crtc_x = state->crtc_x;
+ vc4_state->crtc_y = state->crtc_y;
+ vc4_state->crtc_w = state->crtc_w;
+ vc4_state->crtc_h = state->crtc_h;
+
+ vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0],
+ vc4_state->crtc_w);
+ vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0],
+ vc4_state->crtc_h);
+
+ vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE &&
+ vc4_state->y_scaling[0] == VC4_SCALING_NONE);
+
+ if (num_planes > 1) {
+ vc4_state->is_yuv = true;
+
+ h_subsample = drm_format_horz_chroma_subsampling(format);
+ v_subsample = drm_format_vert_chroma_subsampling(format);
+ vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample;
+ vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample;
+
+ vc4_state->x_scaling[1] =
+ vc4_get_scaling_mode(vc4_state->src_w[1],
+ vc4_state->crtc_w);
+ vc4_state->y_scaling[1] =
+ vc4_get_scaling_mode(vc4_state->src_h[1],
+ vc4_state->crtc_h);
+
+ /* YUV conversion requires that horizontal scaling be enabled
+ * on the UV plane even if vc4_get_scaling_mode() returned
+ * VC4_SCALING_NONE (which can happen when the down-scaling
+ * ratio is 0.5). Let's force it to VC4_SCALING_PPF in this
+ * case.
+ */
+ if (vc4_state->x_scaling[1] == VC4_SCALING_NONE)
+ vc4_state->x_scaling[1] = VC4_SCALING_PPF;
+ } else {
+ vc4_state->is_yuv = false;
+ vc4_state->x_scaling[1] = VC4_SCALING_NONE;
+ vc4_state->y_scaling[1] = VC4_SCALING_NONE;
+ }
+
+ /* No configuring scaling on the cursor plane, since it gets
+ non-vblank-synced updates, and scaling requires requires
+ LBM changes which have to be vblank-synced.
+ */
+ if (plane->type == DRM_PLANE_TYPE_CURSOR && !vc4_state->is_unity)
+ return -EINVAL;
+
+ /* Clamp the on-screen start x/y to 0. The hardware doesn't
+ * support negative y, and negative x wastes bandwidth.
+ */
+ if (vc4_state->crtc_x < 0) {
+ for (i = 0; i < num_planes; i++) {
+ u32 cpp = fb->format->cpp[i];
+ u32 subs = ((i == 0) ? 1 : h_subsample);
+
+ vc4_state->offsets[i] += (cpp *
+ (-vc4_state->crtc_x) / subs);
+ }
+ vc4_state->src_w[0] += vc4_state->crtc_x;
+ vc4_state->src_w[1] += vc4_state->crtc_x / h_subsample;
+ vc4_state->crtc_x = 0;
+ }
+
+ if (vc4_state->crtc_y < 0) {
+ for (i = 0; i < num_planes; i++) {
+ u32 subs = ((i == 0) ? 1 : v_subsample);
+
+ vc4_state->offsets[i] += (fb->pitches[i] *
+ (-vc4_state->crtc_y) / subs);
+ }
+ vc4_state->src_h[0] += vc4_state->crtc_y;
+ vc4_state->src_h[1] += vc4_state->crtc_y / v_subsample;
+ vc4_state->crtc_y = 0;
+ }
+
+ return 0;
+}
+
+static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
+{
+ u32 scale, recip;
+
+ scale = (1 << 16) * src / dst;
+
+ /* The specs note that while the reciprocal would be defined
+ * as (1<<32)/scale, ~0 is close enough.
+ */
+ recip = ~0 / scale;
+
+ vc4_dlist_write(vc4_state,
+ VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) |
+ VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE));
+ vc4_dlist_write(vc4_state,
+ VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP));
+}
+
+static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
+{
+ u32 scale = (1 << 16) * src / dst;
+
+ vc4_dlist_write(vc4_state,
+ SCALER_PPF_AGC |
+ VC4_SET_FIELD(scale, SCALER_PPF_SCALE) |
+ VC4_SET_FIELD(0, SCALER_PPF_IPHASE));
+}
+
+static u32 vc4_lbm_size(struct drm_plane_state *state)
+{
+ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
+ /* This is the worst case number. One of the two sizes will
+ * be used depending on the scaling configuration.
+ */
+ u32 pix_per_line = max(vc4_state->src_w[0], (u32)vc4_state->crtc_w);
+ u32 lbm;
+
+ if (!vc4_state->is_yuv) {
+ if (vc4_state->is_unity)
+ return 0;
+ else if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ)
+ lbm = pix_per_line * 8;
+ else {
+ /* In special cases, this multiplier might be 12. */
+ lbm = pix_per_line * 16;
+ }
+ } else {
+ /* There are cases for this going down to a multiplier
+ * of 2, but according to the firmware source, the
+ * table in the docs is somewhat wrong.
+ */
+ lbm = pix_per_line * 16;
+ }
+
+ lbm = roundup(lbm, 32);
+
+ return lbm;
+}
+
+static void vc4_write_scaling_parameters(struct drm_plane_state *state,
+ int channel)
+{
+ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
+
+ /* Ch0 H-PPF Word 0: Scaling Parameters */
+ if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) {
+ vc4_write_ppf(vc4_state,
+ vc4_state->src_w[channel], vc4_state->crtc_w);
+ }
+
+ /* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */
+ if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) {
+ vc4_write_ppf(vc4_state,
+ vc4_state->src_h[channel], vc4_state->crtc_h);
+ vc4_dlist_write(vc4_state, 0xc0c0c0c0);
+ }
+
+ /* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */
+ if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) {
+ vc4_write_tpz(vc4_state,
+ vc4_state->src_w[channel], vc4_state->crtc_w);
+ }
+
+ /* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */
+ if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) {
+ vc4_write_tpz(vc4_state,
+ vc4_state->src_h[channel], vc4_state->crtc_h);
+ vc4_dlist_write(vc4_state, 0xc0c0c0c0);
+ }
+}
+
+/* Writes out a full display list for an active plane to the plane's
+ * private dlist state.
+ */
+static int vc4_plane_mode_set(struct drm_plane *plane,
+ struct drm_plane_state *state)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
+ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
+ struct drm_framebuffer *fb = state->fb;
+ u32 ctl0_offset = vc4_state->dlist_count;
+ const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
+ u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier);
+ int num_planes = drm_format_num_planes(format->drm);
+ bool mix_plane_alpha;
+ bool covers_screen;
+ u32 scl0, scl1, pitch0;
+ u32 lbm_size, tiling;
+ unsigned long irqflags;
+ u32 hvs_format = format->hvs;
+ int ret, i;
+
+ ret = vc4_plane_setup_clipping_and_scaling(state);
+ if (ret)
+ return ret;
+
+ /* Allocate the LBM memory that the HVS will use for temporary
+ * storage due to our scaling/format conversion.
+ */
+ lbm_size = vc4_lbm_size(state);
+ if (lbm_size) {
+ if (!vc4_state->lbm.allocated) {
+ spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
+ ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm,
+ &vc4_state->lbm,
+ lbm_size, 32, 0, 0);
+ spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
+ } else {
+ WARN_ON_ONCE(lbm_size != vc4_state->lbm.size);
+ }
+ }
+
+ if (ret)
+ return ret;
+
+ /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB
+ * and 4:4:4, scl1 should be set to scl0 so both channels of
+ * the scaler do the same thing. For YUV, the Y plane needs
+ * to be put in channel 1 and Cb/Cr in channel 0, so we swap
+ * the scl fields here.
+ */
+ if (num_planes == 1) {
+ scl0 = vc4_get_scl_field(state, 0);
+ scl1 = scl0;
+ } else {
+ scl0 = vc4_get_scl_field(state, 1);
+ scl1 = vc4_get_scl_field(state, 0);
+ }
+
+ switch (base_format_mod) {
+ case DRM_FORMAT_MOD_LINEAR:
+ tiling = SCALER_CTL0_TILING_LINEAR;
+ pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH);
+ break;
+
+ case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: {
+ /* For T-tiled, the FB pitch is "how many bytes from
+ * one row to the next, such that pitch * tile_h ==
+ * tile_size * tiles_per_row."
+ */
+ u32 tile_size_shift = 12; /* T tiles are 4kb */
+ u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */
+ u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift);
+
+ tiling = SCALER_CTL0_TILING_256B_OR_T;
+
+ pitch0 = (VC4_SET_FIELD(0, SCALER_PITCH0_TILE_Y_OFFSET) |
+ VC4_SET_FIELD(0, SCALER_PITCH0_TILE_WIDTH_L) |
+ VC4_SET_FIELD(tiles_w, SCALER_PITCH0_TILE_WIDTH_R));
+ break;
+ }
+
+ case DRM_FORMAT_MOD_BROADCOM_SAND64:
+ case DRM_FORMAT_MOD_BROADCOM_SAND128:
+ case DRM_FORMAT_MOD_BROADCOM_SAND256: {
+ uint32_t param = fourcc_mod_broadcom_param(fb->modifier);
+
+ /* Column-based NV12 or RGBA.
+ */
+ if (fb->format->num_planes > 1) {
+ if (hvs_format != HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE) {
+ DRM_DEBUG_KMS("SAND format only valid for NV12/21");
+ return -EINVAL;
+ }
+ hvs_format = HVS_PIXEL_FORMAT_H264;
+ } else {
+ if (base_format_mod == DRM_FORMAT_MOD_BROADCOM_SAND256) {
+ DRM_DEBUG_KMS("SAND256 format only valid for H.264");
+ return -EINVAL;
+ }
+ }
+
+ switch (base_format_mod) {
+ case DRM_FORMAT_MOD_BROADCOM_SAND64:
+ tiling = SCALER_CTL0_TILING_64B;
+ break;
+ case DRM_FORMAT_MOD_BROADCOM_SAND128:
+ tiling = SCALER_CTL0_TILING_128B;
+ break;
+ case DRM_FORMAT_MOD_BROADCOM_SAND256:
+ tiling = SCALER_CTL0_TILING_256B_OR_T;
+ break;
+ default:
+ break;
+ }
+
+ if (param > SCALER_TILE_HEIGHT_MASK) {
+ DRM_DEBUG_KMS("SAND height too large (%d)\n", param);
+ return -EINVAL;
+ }
+
+ pitch0 = VC4_SET_FIELD(param, SCALER_TILE_HEIGHT);
+ break;
+ }
+
+ default:
+ DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
+ (long long)fb->modifier);
+ return -EINVAL;
+ }
+
+ /* Control word */
+ vc4_dlist_write(vc4_state,
+ SCALER_CTL0_VALID |
+ VC4_SET_FIELD(SCALER_CTL0_RGBA_EXPAND_ROUND, SCALER_CTL0_RGBA_EXPAND) |
+ (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
+ (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
+ VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
+ (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) |
+ VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
+ VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1));
+
+ /* Position Word 0: Image Positions and Alpha Value */
+ vc4_state->pos0_offset = vc4_state->dlist_count;
+ vc4_dlist_write(vc4_state,
+ VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) |
+ VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) |
+ VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y));
+
+ /* Position Word 1: Scaled Image Dimensions. */
+ if (!vc4_state->is_unity) {
+ vc4_dlist_write(vc4_state,
+ VC4_SET_FIELD(vc4_state->crtc_w,
+ SCALER_POS1_SCL_WIDTH) |
+ VC4_SET_FIELD(vc4_state->crtc_h,
+ SCALER_POS1_SCL_HEIGHT));
+ }
+
+ /* Don't waste cycles mixing with plane alpha if the set alpha
+ * is opaque or there is no per-pixel alpha information.
+ * In any case we use the alpha property value as the fixed alpha.
+ */
+ mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE &&
+ fb->format->has_alpha;
+
+ /* Position Word 2: Source Image Size, Alpha */
+ vc4_state->pos2_offset = vc4_state->dlist_count;
+ vc4_dlist_write(vc4_state,
+ VC4_SET_FIELD(fb->format->has_alpha ?
+ SCALER_POS2_ALPHA_MODE_PIPELINE :
+ SCALER_POS2_ALPHA_MODE_FIXED,
+ SCALER_POS2_ALPHA_MODE) |
+ (mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) |
+ (fb->format->has_alpha ? SCALER_POS2_ALPHA_PREMULT : 0) |
+ VC4_SET_FIELD(vc4_state->src_w[0], SCALER_POS2_WIDTH) |
+ VC4_SET_FIELD(vc4_state->src_h[0], SCALER_POS2_HEIGHT));
+
+ /* Position Word 3: Context. Written by the HVS. */
+ vc4_dlist_write(vc4_state, 0xc0c0c0c0);
+
+
+ /* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers
+ *
+ * The pointers may be any byte address.
+ */
+ vc4_state->ptr0_offset = vc4_state->dlist_count;
+ for (i = 0; i < num_planes; i++)
+ vc4_dlist_write(vc4_state, vc4_state->offsets[i]);
+
+ /* Pointer Context Word 0/1/2: Written by the HVS */
+ for (i = 0; i < num_planes; i++)
+ vc4_dlist_write(vc4_state, 0xc0c0c0c0);
+
+ /* Pitch word 0 */
+ vc4_dlist_write(vc4_state, pitch0);
+
+ /* Pitch word 1/2 */
+ for (i = 1; i < num_planes; i++) {
+ if (hvs_format != HVS_PIXEL_FORMAT_H264) {
+ vc4_dlist_write(vc4_state,
+ VC4_SET_FIELD(fb->pitches[i],
+ SCALER_SRC_PITCH));
+ } else {
+ vc4_dlist_write(vc4_state, pitch0);
+ }
+ }
+
+ /* Colorspace conversion words */
+ if (vc4_state->is_yuv) {
+ vc4_dlist_write(vc4_state, SCALER_CSC0_ITR_R_601_5);
+ vc4_dlist_write(vc4_state, SCALER_CSC1_ITR_R_601_5);
+ vc4_dlist_write(vc4_state, SCALER_CSC2_ITR_R_601_5);
+ }
+
+ if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
+ vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
+ vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
+ vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
+ /* LBM Base Address. */
+ if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
+ vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
+ vc4_dlist_write(vc4_state, vc4_state->lbm.start);
+ }
+
+ if (num_planes > 1) {
+ /* Emit Cb/Cr as channel 0 and Y as channel
+ * 1. This matches how we set up scl0/scl1
+ * above.
+ */
+ vc4_write_scaling_parameters(state, 1);
+ }
+ vc4_write_scaling_parameters(state, 0);
+
+ /* If any PPF setup was done, then all the kernel
+ * pointers get uploaded.
+ */
+ if (vc4_state->x_scaling[0] == VC4_SCALING_PPF ||
+ vc4_state->y_scaling[0] == VC4_SCALING_PPF ||
+ vc4_state->x_scaling[1] == VC4_SCALING_PPF ||
+ vc4_state->y_scaling[1] == VC4_SCALING_PPF) {
+ u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
+ SCALER_PPF_KERNEL_OFFSET);
+
+ /* HPPF plane 0 */
+ vc4_dlist_write(vc4_state, kernel);
+ /* VPPF plane 0 */
+ vc4_dlist_write(vc4_state, kernel);
+ /* HPPF plane 1 */
+ vc4_dlist_write(vc4_state, kernel);
+ /* VPPF plane 1 */
+ vc4_dlist_write(vc4_state, kernel);
+ }
+ }
+
+ vc4_state->dlist[ctl0_offset] |=
+ VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE);
+
+ /* crtc_* are already clipped coordinates. */
+ covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 &&
+ vc4_state->crtc_w == state->crtc->mode.hdisplay &&
+ vc4_state->crtc_h == state->crtc->mode.vdisplay;
+ /* Background fill might be necessary when the plane has per-pixel
+ * alpha content or a non-opaque plane alpha and could blend from the
+ * background or does not cover the entire screen.
+ */
+ vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen ||
+ state->alpha != DRM_BLEND_ALPHA_OPAQUE;
+
+ return 0;
+}
+
+/* If a modeset involves changing the setup of a plane, the atomic
+ * infrastructure will call this to validate a proposed plane setup.
+ * However, if a plane isn't getting updated, this (and the
+ * corresponding vc4_plane_atomic_update) won't get called. Thus, we
+ * compute the dlist here and have all active plane dlists get updated
+ * in the CRTC's flush.
+ */
+static int vc4_plane_atomic_check(struct drm_plane *plane,
+ struct drm_plane_state *state)
+{
+ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
+
+ vc4_state->dlist_count = 0;
+
+ if (plane_enabled(state))
+ return vc4_plane_mode_set(plane, state);
+ else
+ return 0;
+}
+
+static void vc4_plane_atomic_update(struct drm_plane *plane,
+ struct drm_plane_state *old_state)
+{
+ /* No contents here. Since we don't know where in the CRTC's
+ * dlist we should be stored, our dlist is uploaded to the
+ * hardware with vc4_plane_write_dlist() at CRTC atomic_flush
+ * time.
+ */
+}
+
+u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist)
+{
+ struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
+ int i;
+
+ vc4_state->hw_dlist = dlist;
+
+ /* Can't memcpy_toio() because it needs to be 32-bit writes. */
+ for (i = 0; i < vc4_state->dlist_count; i++)
+ writel(vc4_state->dlist[i], &dlist[i]);
+
+ return vc4_state->dlist_count;
+}
+
+u32 vc4_plane_dlist_size(const struct drm_plane_state *state)
+{
+ const struct vc4_plane_state *vc4_state =
+ container_of(state, typeof(*vc4_state), base);
+
+ return vc4_state->dlist_count;
+}
+
+/* Updates the plane to immediately (well, once the FIFO needs
+ * refilling) scan out from at a new framebuffer.
+ */
+void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
+{
+ struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
+ struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
+ uint32_t addr;
+
+ /* We're skipping the address adjustment for negative origin,
+ * because this is only called on the primary plane.
+ */
+ WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0);
+ addr = bo->paddr + fb->offsets[0];
+
+ /* Write the new address into the hardware immediately. The
+ * scanout will start from this address as soon as the FIFO
+ * needs to refill with pixels.
+ */
+ writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset]);
+
+ /* Also update the CPU-side dlist copy, so that any later
+ * atomic updates that don't do a new modeset on our plane
+ * also use our updated address.
+ */
+ vc4_state->dlist[vc4_state->ptr0_offset] = addr;
+}
+
+static void vc4_plane_atomic_async_update(struct drm_plane *plane,
+ struct drm_plane_state *state)
+{
+ struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
+
+ if (plane->state->fb != state->fb) {
+ vc4_plane_async_set_fb(plane, state->fb);
+ drm_atomic_set_fb_for_plane(plane->state, state->fb);
+ }
+
+ swap(plane->state->fb, state->fb);
+ /* Set the cursor's position on the screen. This is the
+ * expected change from the drm_mode_cursor_universal()
+ * helper.
+ */
+ plane->state->crtc_x = state->crtc_x;
+ plane->state->crtc_y = state->crtc_y;
+
+ /* Allow changing the start position within the cursor BO, if
+ * that matters.
+ */
+ plane->state->src_x = state->src_x;
+ plane->state->src_y = state->src_y;
+
+ /* Update the display list based on the new crtc_x/y. */
+ vc4_plane_atomic_check(plane, plane->state);
+
+ /* Note that we can't just call vc4_plane_write_dlist()
+ * because that would smash the context data that the HVS is
+ * currently using.
+ */
+ writel(vc4_state->dlist[vc4_state->pos0_offset],
+ &vc4_state->hw_dlist[vc4_state->pos0_offset]);
+ writel(vc4_state->dlist[vc4_state->pos2_offset],
+ &vc4_state->hw_dlist[vc4_state->pos2_offset]);
+ writel(vc4_state->dlist[vc4_state->ptr0_offset],
+ &vc4_state->hw_dlist[vc4_state->ptr0_offset]);
+}
+
+static int vc4_plane_atomic_async_check(struct drm_plane *plane,
+ struct drm_plane_state *state)
+{
+ /* No configuring new scaling in the fast path. */
+ if (plane->state->crtc_w != state->crtc_w ||
+ plane->state->crtc_h != state->crtc_h ||
+ plane->state->src_w != state->src_w ||
+ plane->state->src_h != state->src_h)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int vc4_prepare_fb(struct drm_plane *plane,
+ struct drm_plane_state *state)
+{
+ struct vc4_bo *bo;
+ struct dma_fence *fence;
+ int ret;
+
+ if (!state->fb)
+ return 0;
+
+ bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
+
+ fence = reservation_object_get_excl_rcu(bo->resv);
+ drm_atomic_set_fence_for_plane(state, fence);
+
+ if (plane->state->fb == state->fb)
+ return 0;
+
+ ret = vc4_bo_inc_usecnt(bo);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static void vc4_cleanup_fb(struct drm_plane *plane,
+ struct drm_plane_state *state)
+{
+ struct vc4_bo *bo;
+
+ if (plane->state->fb == state->fb || !state->fb)
+ return;
+
+ bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
+ vc4_bo_dec_usecnt(bo);
+}
+
+static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
+ .atomic_check = vc4_plane_atomic_check,
+ .atomic_update = vc4_plane_atomic_update,
+ .prepare_fb = vc4_prepare_fb,
+ .cleanup_fb = vc4_cleanup_fb,
+ .atomic_async_check = vc4_plane_atomic_async_check,
+ .atomic_async_update = vc4_plane_atomic_async_update,
+};
+
+static void vc4_plane_destroy(struct drm_plane *plane)
+{
+ drm_plane_helper_disable(plane, NULL);
+ drm_plane_cleanup(plane);
+}
+
+static bool vc4_format_mod_supported(struct drm_plane *plane,
+ uint32_t format,
+ uint64_t modifier)
+{
+ /* Support T_TILING for RGB formats only. */
+ switch (format) {
+ case DRM_FORMAT_XRGB8888:
+ case DRM_FORMAT_ARGB8888:
+ case DRM_FORMAT_ABGR8888:
+ case DRM_FORMAT_XBGR8888:
+ case DRM_FORMAT_RGB565:
+ case DRM_FORMAT_BGR565:
+ case DRM_FORMAT_ARGB1555:
+ case DRM_FORMAT_XRGB1555:
+ switch (fourcc_mod_broadcom_mod(modifier)) {
+ case DRM_FORMAT_MOD_LINEAR:
+ case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED:
+ case DRM_FORMAT_MOD_BROADCOM_SAND64:
+ case DRM_FORMAT_MOD_BROADCOM_SAND128:
+ return true;
+ default:
+ return false;
+ }
+ case DRM_FORMAT_NV12:
+ case DRM_FORMAT_NV21:
+ switch (fourcc_mod_broadcom_mod(modifier)) {
+ case DRM_FORMAT_MOD_LINEAR:
+ case DRM_FORMAT_MOD_BROADCOM_SAND64:
+ case DRM_FORMAT_MOD_BROADCOM_SAND128:
+ case DRM_FORMAT_MOD_BROADCOM_SAND256:
+ return true;
+ default:
+ return false;
+ }
+ case DRM_FORMAT_YUV422:
+ case DRM_FORMAT_YVU422:
+ case DRM_FORMAT_YUV420:
+ case DRM_FORMAT_YVU420:
+ case DRM_FORMAT_NV16:
+ case DRM_FORMAT_NV61:
+ default:
+ return (modifier == DRM_FORMAT_MOD_LINEAR);
+ }
+}
+
+static const struct drm_plane_funcs vc4_plane_funcs = {
+ .update_plane = drm_atomic_helper_update_plane,
+ .disable_plane = drm_atomic_helper_disable_plane,
+ .destroy = vc4_plane_destroy,
+ .set_property = NULL,
+ .reset = vc4_plane_reset,
+ .atomic_duplicate_state = vc4_plane_duplicate_state,
+ .atomic_destroy_state = vc4_plane_destroy_state,
+ .format_mod_supported = vc4_format_mod_supported,
+};
+
+struct drm_plane *vc4_plane_init(struct drm_device *dev,
+ enum drm_plane_type type)
+{
+ struct drm_plane *plane = NULL;
+ struct vc4_plane *vc4_plane;
+ u32 formats[ARRAY_SIZE(hvs_formats)];
+ u32 num_formats = 0;
+ int ret = 0;
+ unsigned i;
+ static const uint64_t modifiers[] = {
+ DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED,
+ DRM_FORMAT_MOD_BROADCOM_SAND128,
+ DRM_FORMAT_MOD_BROADCOM_SAND64,
+ DRM_FORMAT_MOD_BROADCOM_SAND256,
+ DRM_FORMAT_MOD_LINEAR,
+ DRM_FORMAT_MOD_INVALID
+ };
+
+ vc4_plane = devm_kzalloc(dev->dev, sizeof(*vc4_plane),
+ GFP_KERNEL);
+ if (!vc4_plane)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
+ /* Don't allow YUV in cursor planes, since that means
+ * tuning on the scaler, which we don't allow for the
+ * cursor.
+ */
+ if (type != DRM_PLANE_TYPE_CURSOR ||
+ hvs_formats[i].hvs < HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE) {
+ formats[num_formats++] = hvs_formats[i].drm;
+ }
+ }
+ plane = &vc4_plane->base;
+ ret = drm_universal_plane_init(dev, plane, 0,
+ &vc4_plane_funcs,
+ formats, num_formats,
+ modifiers, type, NULL);
+
+ drm_plane_helper_add(plane, &vc4_plane_helper_funcs);
+
+ drm_plane_create_alpha_property(plane);
+
+ return plane;
+}
diff --git a/drivers/gpu/drm/vc4/vc4_qpu_defines.h b/drivers/gpu/drm/vc4/vc4_qpu_defines.h
new file mode 100644
index 000000000..f4e795a0d
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_qpu_defines.h
@@ -0,0 +1,279 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC4_QPU_DEFINES_H
+#define VC4_QPU_DEFINES_H
+
+enum qpu_op_add {
+ QPU_A_NOP,
+ QPU_A_FADD,
+ QPU_A_FSUB,
+ QPU_A_FMIN,
+ QPU_A_FMAX,
+ QPU_A_FMINABS,
+ QPU_A_FMAXABS,
+ QPU_A_FTOI,
+ QPU_A_ITOF,
+ QPU_A_ADD = 12,
+ QPU_A_SUB,
+ QPU_A_SHR,
+ QPU_A_ASR,
+ QPU_A_ROR,
+ QPU_A_SHL,
+ QPU_A_MIN,
+ QPU_A_MAX,
+ QPU_A_AND,
+ QPU_A_OR,
+ QPU_A_XOR,
+ QPU_A_NOT,
+ QPU_A_CLZ,
+ QPU_A_V8ADDS = 30,
+ QPU_A_V8SUBS = 31,
+};
+
+enum qpu_op_mul {
+ QPU_M_NOP,
+ QPU_M_FMUL,
+ QPU_M_MUL24,
+ QPU_M_V8MULD,
+ QPU_M_V8MIN,
+ QPU_M_V8MAX,
+ QPU_M_V8ADDS,
+ QPU_M_V8SUBS,
+};
+
+enum qpu_raddr {
+ QPU_R_FRAG_PAYLOAD_ZW = 15, /* W for A file, Z for B file */
+ /* 0-31 are the plain regfile a or b fields */
+ QPU_R_UNIF = 32,
+ QPU_R_VARY = 35,
+ QPU_R_ELEM_QPU = 38,
+ QPU_R_NOP,
+ QPU_R_XY_PIXEL_COORD = 41,
+ QPU_R_MS_REV_FLAGS = 42,
+ QPU_R_VPM = 48,
+ QPU_R_VPM_LD_BUSY,
+ QPU_R_VPM_LD_WAIT,
+ QPU_R_MUTEX_ACQUIRE,
+};
+
+enum qpu_waddr {
+ /* 0-31 are the plain regfile a or b fields */
+ QPU_W_ACC0 = 32, /* aka r0 */
+ QPU_W_ACC1,
+ QPU_W_ACC2,
+ QPU_W_ACC3,
+ QPU_W_TMU_NOSWAP,
+ QPU_W_ACC5,
+ QPU_W_HOST_INT,
+ QPU_W_NOP,
+ QPU_W_UNIFORMS_ADDRESS,
+ QPU_W_QUAD_XY, /* X for regfile a, Y for regfile b */
+ QPU_W_MS_FLAGS = 42,
+ QPU_W_REV_FLAG = 42,
+ QPU_W_TLB_STENCIL_SETUP = 43,
+ QPU_W_TLB_Z,
+ QPU_W_TLB_COLOR_MS,
+ QPU_W_TLB_COLOR_ALL,
+ QPU_W_TLB_ALPHA_MASK,
+ QPU_W_VPM,
+ QPU_W_VPMVCD_SETUP, /* LD for regfile a, ST for regfile b */
+ QPU_W_VPM_ADDR, /* LD for regfile a, ST for regfile b */
+ QPU_W_MUTEX_RELEASE,
+ QPU_W_SFU_RECIP,
+ QPU_W_SFU_RECIPSQRT,
+ QPU_W_SFU_EXP,
+ QPU_W_SFU_LOG,
+ QPU_W_TMU0_S,
+ QPU_W_TMU0_T,
+ QPU_W_TMU0_R,
+ QPU_W_TMU0_B,
+ QPU_W_TMU1_S,
+ QPU_W_TMU1_T,
+ QPU_W_TMU1_R,
+ QPU_W_TMU1_B,
+};
+
+enum qpu_sig_bits {
+ QPU_SIG_SW_BREAKPOINT,
+ QPU_SIG_NONE,
+ QPU_SIG_THREAD_SWITCH,
+ QPU_SIG_PROG_END,
+ QPU_SIG_WAIT_FOR_SCOREBOARD,
+ QPU_SIG_SCOREBOARD_UNLOCK,
+ QPU_SIG_LAST_THREAD_SWITCH,
+ QPU_SIG_COVERAGE_LOAD,
+ QPU_SIG_COLOR_LOAD,
+ QPU_SIG_COLOR_LOAD_END,
+ QPU_SIG_LOAD_TMU0,
+ QPU_SIG_LOAD_TMU1,
+ QPU_SIG_ALPHA_MASK_LOAD,
+ QPU_SIG_SMALL_IMM,
+ QPU_SIG_LOAD_IMM,
+ QPU_SIG_BRANCH
+};
+
+enum qpu_mux {
+ /* hardware mux values */
+ QPU_MUX_R0,
+ QPU_MUX_R1,
+ QPU_MUX_R2,
+ QPU_MUX_R3,
+ QPU_MUX_R4,
+ QPU_MUX_R5,
+ QPU_MUX_A,
+ QPU_MUX_B,
+
+ /* non-hardware mux values */
+ QPU_MUX_IMM,
+};
+
+enum qpu_cond {
+ QPU_COND_NEVER,
+ QPU_COND_ALWAYS,
+ QPU_COND_ZS,
+ QPU_COND_ZC,
+ QPU_COND_NS,
+ QPU_COND_NC,
+ QPU_COND_CS,
+ QPU_COND_CC,
+};
+
+enum qpu_pack_mul {
+ QPU_PACK_MUL_NOP,
+ /* replicated to each 8 bits of the 32-bit dst. */
+ QPU_PACK_MUL_8888 = 3,
+ QPU_PACK_MUL_8A,
+ QPU_PACK_MUL_8B,
+ QPU_PACK_MUL_8C,
+ QPU_PACK_MUL_8D,
+};
+
+enum qpu_pack_a {
+ QPU_PACK_A_NOP,
+ /* convert to 16 bit float if float input, or to int16. */
+ QPU_PACK_A_16A,
+ QPU_PACK_A_16B,
+ /* replicated to each 8 bits of the 32-bit dst. */
+ QPU_PACK_A_8888,
+ /* Convert to 8-bit unsigned int. */
+ QPU_PACK_A_8A,
+ QPU_PACK_A_8B,
+ QPU_PACK_A_8C,
+ QPU_PACK_A_8D,
+
+ /* Saturating variants of the previous instructions. */
+ QPU_PACK_A_32_SAT, /* int-only */
+ QPU_PACK_A_16A_SAT, /* int or float */
+ QPU_PACK_A_16B_SAT,
+ QPU_PACK_A_8888_SAT,
+ QPU_PACK_A_8A_SAT,
+ QPU_PACK_A_8B_SAT,
+ QPU_PACK_A_8C_SAT,
+ QPU_PACK_A_8D_SAT,
+};
+
+enum qpu_unpack_r4 {
+ QPU_UNPACK_R4_NOP,
+ QPU_UNPACK_R4_F16A_TO_F32,
+ QPU_UNPACK_R4_F16B_TO_F32,
+ QPU_UNPACK_R4_8D_REP,
+ QPU_UNPACK_R4_8A,
+ QPU_UNPACK_R4_8B,
+ QPU_UNPACK_R4_8C,
+ QPU_UNPACK_R4_8D,
+};
+
+#define QPU_MASK(high, low) \
+ ((((uint64_t)1 << ((high) - (low) + 1)) - 1) << (low))
+
+#define QPU_GET_FIELD(word, field) \
+ ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT))
+
+#define QPU_SIG_SHIFT 60
+#define QPU_SIG_MASK QPU_MASK(63, 60)
+
+#define QPU_UNPACK_SHIFT 57
+#define QPU_UNPACK_MASK QPU_MASK(59, 57)
+
+/**
+ * If set, the pack field means PACK_MUL or R4 packing, instead of normal
+ * regfile a packing.
+ */
+#define QPU_PM ((uint64_t)1 << 56)
+
+#define QPU_PACK_SHIFT 52
+#define QPU_PACK_MASK QPU_MASK(55, 52)
+
+#define QPU_COND_ADD_SHIFT 49
+#define QPU_COND_ADD_MASK QPU_MASK(51, 49)
+#define QPU_COND_MUL_SHIFT 46
+#define QPU_COND_MUL_MASK QPU_MASK(48, 46)
+
+#define QPU_BRANCH_COND_SHIFT 52
+#define QPU_BRANCH_COND_MASK QPU_MASK(55, 52)
+
+#define QPU_BRANCH_REL ((uint64_t)1 << 51)
+#define QPU_BRANCH_REG ((uint64_t)1 << 50)
+
+#define QPU_BRANCH_RADDR_A_SHIFT 45
+#define QPU_BRANCH_RADDR_A_MASK QPU_MASK(49, 45)
+
+#define QPU_SF ((uint64_t)1 << 45)
+
+#define QPU_WADDR_ADD_SHIFT 38
+#define QPU_WADDR_ADD_MASK QPU_MASK(43, 38)
+#define QPU_WADDR_MUL_SHIFT 32
+#define QPU_WADDR_MUL_MASK QPU_MASK(37, 32)
+
+#define QPU_OP_MUL_SHIFT 29
+#define QPU_OP_MUL_MASK QPU_MASK(31, 29)
+
+#define QPU_RADDR_A_SHIFT 18
+#define QPU_RADDR_A_MASK QPU_MASK(23, 18)
+#define QPU_RADDR_B_SHIFT 12
+#define QPU_RADDR_B_MASK QPU_MASK(17, 12)
+#define QPU_SMALL_IMM_SHIFT 12
+#define QPU_SMALL_IMM_MASK QPU_MASK(17, 12)
+
+#define QPU_ADD_A_SHIFT 9
+#define QPU_ADD_A_MASK QPU_MASK(11, 9)
+#define QPU_ADD_B_SHIFT 6
+#define QPU_ADD_B_MASK QPU_MASK(8, 6)
+#define QPU_MUL_A_SHIFT 3
+#define QPU_MUL_A_MASK QPU_MASK(5, 3)
+#define QPU_MUL_B_SHIFT 0
+#define QPU_MUL_B_MASK QPU_MASK(2, 0)
+
+#define QPU_WS ((uint64_t)1 << 44)
+
+#define QPU_OP_ADD_SHIFT 24
+#define QPU_OP_ADD_MASK QPU_MASK(28, 24)
+
+#define QPU_LOAD_IMM_SHIFT 0
+#define QPU_LOAD_IMM_MASK QPU_MASK(31, 0)
+
+#define QPU_BRANCH_TARGET_SHIFT 0
+#define QPU_BRANCH_TARGET_MASK QPU_MASK(31, 0)
+
+#endif /* VC4_QPU_DEFINES_H */
diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h
new file mode 100644
index 000000000..d6864fa4b
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_regs.h
@@ -0,0 +1,1051 @@
+/*
+ * Copyright © 2014-2015 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef VC4_REGS_H
+#define VC4_REGS_H
+
+#include <linux/bitops.h>
+
+#define VC4_MASK(high, low) ((u32)GENMASK(high, low))
+/* Using the GNU statement expression extension */
+#define VC4_SET_FIELD(value, field) \
+ ({ \
+ uint32_t fieldval = (value) << field##_SHIFT; \
+ WARN_ON((fieldval & ~field##_MASK) != 0); \
+ fieldval & field##_MASK; \
+ })
+
+#define VC4_GET_FIELD(word, field) (((word) & field##_MASK) >> \
+ field##_SHIFT)
+
+#define V3D_IDENT0 0x00000
+# define V3D_EXPECTED_IDENT0 \
+ ((2 << 24) | \
+ ('V' << 0) | \
+ ('3' << 8) | \
+ ('D' << 16))
+
+#define V3D_IDENT1 0x00004
+/* Multiples of 1kb */
+# define V3D_IDENT1_VPM_SIZE_MASK VC4_MASK(31, 28)
+# define V3D_IDENT1_VPM_SIZE_SHIFT 28
+# define V3D_IDENT1_NSEM_MASK VC4_MASK(23, 16)
+# define V3D_IDENT1_NSEM_SHIFT 16
+# define V3D_IDENT1_TUPS_MASK VC4_MASK(15, 12)
+# define V3D_IDENT1_TUPS_SHIFT 12
+# define V3D_IDENT1_QUPS_MASK VC4_MASK(11, 8)
+# define V3D_IDENT1_QUPS_SHIFT 8
+# define V3D_IDENT1_NSLC_MASK VC4_MASK(7, 4)
+# define V3D_IDENT1_NSLC_SHIFT 4
+# define V3D_IDENT1_REV_MASK VC4_MASK(3, 0)
+# define V3D_IDENT1_REV_SHIFT 0
+
+#define V3D_IDENT2 0x00008
+#define V3D_SCRATCH 0x00010
+#define V3D_L2CACTL 0x00020
+# define V3D_L2CACTL_L2CCLR BIT(2)
+# define V3D_L2CACTL_L2CDIS BIT(1)
+# define V3D_L2CACTL_L2CENA BIT(0)
+
+#define V3D_SLCACTL 0x00024
+# define V3D_SLCACTL_T1CC_MASK VC4_MASK(27, 24)
+# define V3D_SLCACTL_T1CC_SHIFT 24
+# define V3D_SLCACTL_T0CC_MASK VC4_MASK(19, 16)
+# define V3D_SLCACTL_T0CC_SHIFT 16
+# define V3D_SLCACTL_UCC_MASK VC4_MASK(11, 8)
+# define V3D_SLCACTL_UCC_SHIFT 8
+# define V3D_SLCACTL_ICC_MASK VC4_MASK(3, 0)
+# define V3D_SLCACTL_ICC_SHIFT 0
+
+#define V3D_INTCTL 0x00030
+#define V3D_INTENA 0x00034
+#define V3D_INTDIS 0x00038
+# define V3D_INT_SPILLUSE BIT(3)
+# define V3D_INT_OUTOMEM BIT(2)
+# define V3D_INT_FLDONE BIT(1)
+# define V3D_INT_FRDONE BIT(0)
+
+#define V3D_CT0CS 0x00100
+#define V3D_CT1CS 0x00104
+#define V3D_CTNCS(n) (V3D_CT0CS + 4 * n)
+# define V3D_CTRSTA BIT(15)
+# define V3D_CTSEMA BIT(12)
+# define V3D_CTRTSD BIT(8)
+# define V3D_CTRUN BIT(5)
+# define V3D_CTSUBS BIT(4)
+# define V3D_CTERR BIT(3)
+# define V3D_CTMODE BIT(0)
+
+#define V3D_CT0EA 0x00108
+#define V3D_CT1EA 0x0010c
+#define V3D_CTNEA(n) (V3D_CT0EA + 4 * (n))
+#define V3D_CT0CA 0x00110
+#define V3D_CT1CA 0x00114
+#define V3D_CTNCA(n) (V3D_CT0CA + 4 * (n))
+#define V3D_CT00RA0 0x00118
+#define V3D_CT01RA0 0x0011c
+#define V3D_CTNRA0(n) (V3D_CT00RA0 + 4 * (n))
+#define V3D_CT0LC 0x00120
+#define V3D_CT1LC 0x00124
+#define V3D_CTNLC(n) (V3D_CT0LC + 4 * (n))
+#define V3D_CT0PC 0x00128
+#define V3D_CT1PC 0x0012c
+#define V3D_CTNPC(n) (V3D_CT0PC + 4 * (n))
+
+#define V3D_PCS 0x00130
+# define V3D_BMOOM BIT(8)
+# define V3D_RMBUSY BIT(3)
+# define V3D_RMACTIVE BIT(2)
+# define V3D_BMBUSY BIT(1)
+# define V3D_BMACTIVE BIT(0)
+
+#define V3D_BFC 0x00134
+#define V3D_RFC 0x00138
+#define V3D_BPCA 0x00300
+#define V3D_BPCS 0x00304
+#define V3D_BPOA 0x00308
+#define V3D_BPOS 0x0030c
+#define V3D_BXCF 0x00310
+#define V3D_SQRSV0 0x00410
+#define V3D_SQRSV1 0x00414
+#define V3D_SQCNTL 0x00418
+#define V3D_SRQPC 0x00430
+#define V3D_SRQUA 0x00434
+#define V3D_SRQUL 0x00438
+#define V3D_SRQCS 0x0043c
+#define V3D_VPACNTL 0x00500
+#define V3D_VPMBASE 0x00504
+#define V3D_PCTRC 0x00670
+#define V3D_PCTRE 0x00674
+# define V3D_PCTRE_EN BIT(31)
+#define V3D_PCTR(x) (0x00680 + ((x) * 8))
+#define V3D_PCTRS(x) (0x00684 + ((x) * 8))
+#define V3D_DBGE 0x00f00
+#define V3D_FDBGO 0x00f04
+#define V3D_FDBGB 0x00f08
+#define V3D_FDBGR 0x00f0c
+#define V3D_FDBGS 0x00f10
+#define V3D_ERRSTAT 0x00f20
+
+#define PV_CONTROL 0x00
+# define PV_CONTROL_FORMAT_MASK VC4_MASK(23, 21)
+# define PV_CONTROL_FORMAT_SHIFT 21
+# define PV_CONTROL_FORMAT_24 0
+# define PV_CONTROL_FORMAT_DSIV_16 1
+# define PV_CONTROL_FORMAT_DSIC_16 2
+# define PV_CONTROL_FORMAT_DSIV_18 3
+# define PV_CONTROL_FORMAT_DSIV_24 4
+
+# define PV_CONTROL_FIFO_LEVEL_MASK VC4_MASK(20, 15)
+# define PV_CONTROL_FIFO_LEVEL_SHIFT 15
+# define PV_CONTROL_CLR_AT_START BIT(14)
+# define PV_CONTROL_TRIGGER_UNDERFLOW BIT(13)
+# define PV_CONTROL_WAIT_HSTART BIT(12)
+# define PV_CONTROL_PIXEL_REP_MASK VC4_MASK(5, 4)
+# define PV_CONTROL_PIXEL_REP_SHIFT 4
+# define PV_CONTROL_CLK_SELECT_DSI 0
+# define PV_CONTROL_CLK_SELECT_DPI_SMI_HDMI 1
+# define PV_CONTROL_CLK_SELECT_VEC 2
+# define PV_CONTROL_CLK_SELECT_MASK VC4_MASK(3, 2)
+# define PV_CONTROL_CLK_SELECT_SHIFT 2
+# define PV_CONTROL_FIFO_CLR BIT(1)
+# define PV_CONTROL_EN BIT(0)
+
+#define PV_V_CONTROL 0x04
+# define PV_VCONTROL_ODD_DELAY_MASK VC4_MASK(22, 6)
+# define PV_VCONTROL_ODD_DELAY_SHIFT 6
+# define PV_VCONTROL_ODD_FIRST BIT(5)
+# define PV_VCONTROL_INTERLACE BIT(4)
+# define PV_VCONTROL_DSI BIT(3)
+# define PV_VCONTROL_COMMAND BIT(2)
+# define PV_VCONTROL_CONTINUOUS BIT(1)
+# define PV_VCONTROL_VIDEN BIT(0)
+
+#define PV_VSYNCD_EVEN 0x08
+
+#define PV_HORZA 0x0c
+# define PV_HORZA_HBP_MASK VC4_MASK(31, 16)
+# define PV_HORZA_HBP_SHIFT 16
+# define PV_HORZA_HSYNC_MASK VC4_MASK(15, 0)
+# define PV_HORZA_HSYNC_SHIFT 0
+
+#define PV_HORZB 0x10
+# define PV_HORZB_HFP_MASK VC4_MASK(31, 16)
+# define PV_HORZB_HFP_SHIFT 16
+# define PV_HORZB_HACTIVE_MASK VC4_MASK(15, 0)
+# define PV_HORZB_HACTIVE_SHIFT 0
+
+#define PV_VERTA 0x14
+# define PV_VERTA_VBP_MASK VC4_MASK(31, 16)
+# define PV_VERTA_VBP_SHIFT 16
+# define PV_VERTA_VSYNC_MASK VC4_MASK(15, 0)
+# define PV_VERTA_VSYNC_SHIFT 0
+
+#define PV_VERTB 0x18
+# define PV_VERTB_VFP_MASK VC4_MASK(31, 16)
+# define PV_VERTB_VFP_SHIFT 16
+# define PV_VERTB_VACTIVE_MASK VC4_MASK(15, 0)
+# define PV_VERTB_VACTIVE_SHIFT 0
+
+#define PV_VERTA_EVEN 0x1c
+#define PV_VERTB_EVEN 0x20
+
+#define PV_INTEN 0x24
+#define PV_INTSTAT 0x28
+# define PV_INT_VID_IDLE BIT(9)
+# define PV_INT_VFP_END BIT(8)
+# define PV_INT_VFP_START BIT(7)
+# define PV_INT_VACT_START BIT(6)
+# define PV_INT_VBP_START BIT(5)
+# define PV_INT_VSYNC_START BIT(4)
+# define PV_INT_HFP_START BIT(3)
+# define PV_INT_HACT_START BIT(2)
+# define PV_INT_HBP_START BIT(1)
+# define PV_INT_HSYNC_START BIT(0)
+
+#define PV_STAT 0x2c
+
+#define PV_HACT_ACT 0x30
+
+#define SCALER_DISPCTRL 0x00000000
+/* Global register for clock gating the HVS */
+# define SCALER_DISPCTRL_ENABLE BIT(31)
+# define SCALER_DISPCTRL_DSP2EISLUR BIT(15)
+# define SCALER_DISPCTRL_DSP1EISLUR BIT(14)
+# define SCALER_DISPCTRL_DSP3_MUX_MASK VC4_MASK(19, 18)
+# define SCALER_DISPCTRL_DSP3_MUX_SHIFT 18
+
+/* Enables Display 0 short line and underrun contribution to
+ * SCALER_DISPSTAT_IRQDISP0. Note that short frame contributions are
+ * always enabled.
+ */
+# define SCALER_DISPCTRL_DSP0EISLUR BIT(13)
+# define SCALER_DISPCTRL_DSP2EIEOLN BIT(12)
+# define SCALER_DISPCTRL_DSP2EIEOF BIT(11)
+# define SCALER_DISPCTRL_DSP1EIEOLN BIT(10)
+# define SCALER_DISPCTRL_DSP1EIEOF BIT(9)
+/* Enables Display 0 end-of-line-N contribution to
+ * SCALER_DISPSTAT_IRQDISP0
+ */
+# define SCALER_DISPCTRL_DSP0EIEOLN BIT(8)
+/* Enables Display 0 EOF contribution to SCALER_DISPSTAT_IRQDISP0 */
+# define SCALER_DISPCTRL_DSP0EIEOF BIT(7)
+
+# define SCALER_DISPCTRL_SLVRDEIRQ BIT(6)
+# define SCALER_DISPCTRL_SLVWREIRQ BIT(5)
+# define SCALER_DISPCTRL_DMAEIRQ BIT(4)
+# define SCALER_DISPCTRL_DISP2EIRQ BIT(3)
+# define SCALER_DISPCTRL_DISP1EIRQ BIT(2)
+/* Enables interrupt generation on the enabled EOF/EOLN/EISLUR
+ * bits and short frames..
+ */
+# define SCALER_DISPCTRL_DISP0EIRQ BIT(1)
+/* Enables interrupt generation on scaler profiler interrupt. */
+# define SCALER_DISPCTRL_SCLEIRQ BIT(0)
+
+#define SCALER_DISPSTAT 0x00000004
+# define SCALER_DISPSTAT_COBLOW2 BIT(29)
+# define SCALER_DISPSTAT_EOLN2 BIT(28)
+# define SCALER_DISPSTAT_ESFRAME2 BIT(27)
+# define SCALER_DISPSTAT_ESLINE2 BIT(26)
+# define SCALER_DISPSTAT_EUFLOW2 BIT(25)
+# define SCALER_DISPSTAT_EOF2 BIT(24)
+
+# define SCALER_DISPSTAT_COBLOW1 BIT(21)
+# define SCALER_DISPSTAT_EOLN1 BIT(20)
+# define SCALER_DISPSTAT_ESFRAME1 BIT(19)
+# define SCALER_DISPSTAT_ESLINE1 BIT(18)
+# define SCALER_DISPSTAT_EUFLOW1 BIT(17)
+# define SCALER_DISPSTAT_EOF1 BIT(16)
+
+# define SCALER_DISPSTAT_RESP_MASK VC4_MASK(15, 14)
+# define SCALER_DISPSTAT_RESP_SHIFT 14
+# define SCALER_DISPSTAT_RESP_OKAY 0
+# define SCALER_DISPSTAT_RESP_EXOKAY 1
+# define SCALER_DISPSTAT_RESP_SLVERR 2
+# define SCALER_DISPSTAT_RESP_DECERR 3
+
+# define SCALER_DISPSTAT_COBLOW0 BIT(13)
+/* Set when the DISPEOLN line is done compositing. */
+# define SCALER_DISPSTAT_EOLN0 BIT(12)
+/* Set when VSTART is seen but there are still pixels in the current
+ * output line.
+ */
+# define SCALER_DISPSTAT_ESFRAME0 BIT(11)
+/* Set when HSTART is seen but there are still pixels in the current
+ * output line.
+ */
+# define SCALER_DISPSTAT_ESLINE0 BIT(10)
+/* Set when the the downstream tries to read from the display FIFO
+ * while it's empty.
+ */
+# define SCALER_DISPSTAT_EUFLOW0 BIT(9)
+/* Set when the display mode changes from RUN to EOF */
+# define SCALER_DISPSTAT_EOF0 BIT(8)
+
+/* Set on AXI invalid DMA ID error. */
+# define SCALER_DISPSTAT_DMA_ERROR BIT(7)
+/* Set on AXI slave read decode error */
+# define SCALER_DISPSTAT_IRQSLVRD BIT(6)
+/* Set on AXI slave write decode error */
+# define SCALER_DISPSTAT_IRQSLVWR BIT(5)
+/* Set when SCALER_DISPSTAT_DMA_ERROR is set, or
+ * SCALER_DISPSTAT_RESP_ERROR is not SCALER_DISPSTAT_RESP_OKAY.
+ */
+# define SCALER_DISPSTAT_IRQDMA BIT(4)
+# define SCALER_DISPSTAT_IRQDISP2 BIT(3)
+# define SCALER_DISPSTAT_IRQDISP1 BIT(2)
+/* Set when any of the EOF/EOLN/ESFRAME/ESLINE bits are set and their
+ * corresponding interrupt bit is enabled in DISPCTRL.
+ */
+# define SCALER_DISPSTAT_IRQDISP0 BIT(1)
+/* On read, the profiler interrupt. On write, clear *all* interrupt bits. */
+# define SCALER_DISPSTAT_IRQSCL BIT(0)
+
+#define SCALER_DISPID 0x00000008
+#define SCALER_DISPECTRL 0x0000000c
+#define SCALER_DISPPROF 0x00000010
+#define SCALER_DISPDITHER 0x00000014
+#define SCALER_DISPEOLN 0x00000018
+#define SCALER_DISPLIST0 0x00000020
+#define SCALER_DISPLIST1 0x00000024
+#define SCALER_DISPLIST2 0x00000028
+#define SCALER_DISPLSTAT 0x0000002c
+#define SCALER_DISPLISTX(x) (SCALER_DISPLIST0 + \
+ (x) * (SCALER_DISPLIST1 - \
+ SCALER_DISPLIST0))
+
+#define SCALER_DISPLACT0 0x00000030
+#define SCALER_DISPLACT1 0x00000034
+#define SCALER_DISPLACT2 0x00000038
+#define SCALER_DISPLACTX(x) (SCALER_DISPLACT0 + \
+ (x) * (SCALER_DISPLACT1 - \
+ SCALER_DISPLACT0))
+
+#define SCALER_DISPCTRL0 0x00000040
+# define SCALER_DISPCTRLX_ENABLE BIT(31)
+# define SCALER_DISPCTRLX_RESET BIT(30)
+/* Generates a single frame when VSTART is seen and stops at the last
+ * pixel read from the FIFO.
+ */
+# define SCALER_DISPCTRLX_ONESHOT BIT(29)
+/* Processes a single context in the dlist and then task switch,
+ * instead of an entire line.
+ */
+# define SCALER_DISPCTRLX_ONECTX BIT(28)
+/* Set to have DISPSLAVE return 2 16bpp pixels and no status data. */
+# define SCALER_DISPCTRLX_FIFO32 BIT(27)
+/* Turns on output to the DISPSLAVE register instead of the normal
+ * FIFO.
+ */
+# define SCALER_DISPCTRLX_FIFOREG BIT(26)
+
+# define SCALER_DISPCTRLX_WIDTH_MASK VC4_MASK(23, 12)
+# define SCALER_DISPCTRLX_WIDTH_SHIFT 12
+# define SCALER_DISPCTRLX_HEIGHT_MASK VC4_MASK(11, 0)
+# define SCALER_DISPCTRLX_HEIGHT_SHIFT 0
+
+#define SCALER_DISPBKGND0 0x00000044
+# define SCALER_DISPBKGND_AUTOHS BIT(31)
+# define SCALER_DISPBKGND_INTERLACE BIT(30)
+# define SCALER_DISPBKGND_GAMMA BIT(29)
+# define SCALER_DISPBKGND_TESTMODE_MASK VC4_MASK(28, 25)
+# define SCALER_DISPBKGND_TESTMODE_SHIFT 25
+/* Enables filling the scaler line with the RGB value in the low 24
+ * bits before compositing. Costs cycles, so should be skipped if
+ * opaque display planes will cover everything.
+ */
+# define SCALER_DISPBKGND_FILL BIT(24)
+
+#define SCALER_DISPSTAT0 0x00000048
+# define SCALER_DISPSTATX_MODE_MASK VC4_MASK(31, 30)
+# define SCALER_DISPSTATX_MODE_SHIFT 30
+# define SCALER_DISPSTATX_MODE_DISABLED 0
+# define SCALER_DISPSTATX_MODE_INIT 1
+# define SCALER_DISPSTATX_MODE_RUN 2
+# define SCALER_DISPSTATX_MODE_EOF 3
+# define SCALER_DISPSTATX_FULL BIT(29)
+# define SCALER_DISPSTATX_EMPTY BIT(28)
+# define SCALER_DISPSTATX_FRAME_COUNT_MASK VC4_MASK(17, 12)
+# define SCALER_DISPSTATX_FRAME_COUNT_SHIFT 12
+# define SCALER_DISPSTATX_LINE_MASK VC4_MASK(11, 0)
+# define SCALER_DISPSTATX_LINE_SHIFT 0
+
+#define SCALER_DISPBASE0 0x0000004c
+/* Last pixel in the COB (display FIFO memory) allocated to this HVS
+ * channel. Must be 4-pixel aligned (and thus 4 pixels less than the
+ * next COB base).
+ */
+# define SCALER_DISPBASEX_TOP_MASK VC4_MASK(31, 16)
+# define SCALER_DISPBASEX_TOP_SHIFT 16
+/* First pixel in the COB (display FIFO memory) allocated to this HVS
+ * channel. Must be 4-pixel aligned.
+ */
+# define SCALER_DISPBASEX_BASE_MASK VC4_MASK(15, 0)
+# define SCALER_DISPBASEX_BASE_SHIFT 0
+
+#define SCALER_DISPCTRL1 0x00000050
+#define SCALER_DISPBKGND1 0x00000054
+#define SCALER_DISPBKGNDX(x) (SCALER_DISPBKGND0 + \
+ (x) * (SCALER_DISPBKGND1 - \
+ SCALER_DISPBKGND0))
+#define SCALER_DISPSTAT1 0x00000058
+#define SCALER_DISPSTATX(x) (SCALER_DISPSTAT0 + \
+ (x) * (SCALER_DISPSTAT1 - \
+ SCALER_DISPSTAT0))
+#define SCALER_DISPBASE1 0x0000005c
+#define SCALER_DISPBASEX(x) (SCALER_DISPBASE0 + \
+ (x) * (SCALER_DISPBASE1 - \
+ SCALER_DISPBASE0))
+#define SCALER_DISPCTRL2 0x00000060
+#define SCALER_DISPCTRLX(x) (SCALER_DISPCTRL0 + \
+ (x) * (SCALER_DISPCTRL1 - \
+ SCALER_DISPCTRL0))
+#define SCALER_DISPBKGND2 0x00000064
+#define SCALER_DISPSTAT2 0x00000068
+#define SCALER_DISPBASE2 0x0000006c
+#define SCALER_DISPALPHA2 0x00000070
+#define SCALER_GAMADDR 0x00000078
+# define SCALER_GAMADDR_AUTOINC BIT(31)
+/* Enables all gamma ramp SRAMs, not just those of CRTCs with gamma
+ * enabled.
+ */
+# define SCALER_GAMADDR_SRAMENB BIT(30)
+
+#define SCALER_OLEDOFFS 0x00000080
+/* Clamps R to [16,235] and G/B to [16,240]. */
+# define SCALER_OLEDOFFS_YUVCLAMP BIT(31)
+
+/* Chooses which display FIFO the matrix applies to. */
+# define SCALER_OLEDOFFS_DISPFIFO_MASK VC4_MASK(25, 24)
+# define SCALER_OLEDOFFS_DISPFIFO_SHIFT 24
+# define SCALER_OLEDOFFS_DISPFIFO_DISABLED 0
+# define SCALER_OLEDOFFS_DISPFIFO_0 1
+# define SCALER_OLEDOFFS_DISPFIFO_1 2
+# define SCALER_OLEDOFFS_DISPFIFO_2 3
+
+/* Offsets are 8-bit 2s-complement. */
+# define SCALER_OLEDOFFS_RED_MASK VC4_MASK(23, 16)
+# define SCALER_OLEDOFFS_RED_SHIFT 16
+# define SCALER_OLEDOFFS_GREEN_MASK VC4_MASK(15, 8)
+# define SCALER_OLEDOFFS_GREEN_SHIFT 8
+# define SCALER_OLEDOFFS_BLUE_MASK VC4_MASK(7, 0)
+# define SCALER_OLEDOFFS_BLUE_SHIFT 0
+
+/* The coefficients are S0.9 fractions. */
+#define SCALER_OLEDCOEF0 0x00000084
+# define SCALER_OLEDCOEF0_B_TO_R_MASK VC4_MASK(29, 20)
+# define SCALER_OLEDCOEF0_B_TO_R_SHIFT 20
+# define SCALER_OLEDCOEF0_B_TO_G_MASK VC4_MASK(19, 10)
+# define SCALER_OLEDCOEF0_B_TO_G_SHIFT 10
+# define SCALER_OLEDCOEF0_B_TO_B_MASK VC4_MASK(9, 0)
+# define SCALER_OLEDCOEF0_B_TO_B_SHIFT 0
+
+#define SCALER_OLEDCOEF1 0x00000088
+# define SCALER_OLEDCOEF1_G_TO_R_MASK VC4_MASK(29, 20)
+# define SCALER_OLEDCOEF1_G_TO_R_SHIFT 20
+# define SCALER_OLEDCOEF1_G_TO_G_MASK VC4_MASK(19, 10)
+# define SCALER_OLEDCOEF1_G_TO_G_SHIFT 10
+# define SCALER_OLEDCOEF1_G_TO_B_MASK VC4_MASK(9, 0)
+# define SCALER_OLEDCOEF1_G_TO_B_SHIFT 0
+
+#define SCALER_OLEDCOEF2 0x0000008c
+# define SCALER_OLEDCOEF2_R_TO_R_MASK VC4_MASK(29, 20)
+# define SCALER_OLEDCOEF2_R_TO_R_SHIFT 20
+# define SCALER_OLEDCOEF2_R_TO_G_MASK VC4_MASK(19, 10)
+# define SCALER_OLEDCOEF2_R_TO_G_SHIFT 10
+# define SCALER_OLEDCOEF2_R_TO_B_MASK VC4_MASK(9, 0)
+# define SCALER_OLEDCOEF2_R_TO_B_SHIFT 0
+
+/* Slave addresses for DMAing from HVS composition output to other
+ * devices. The top bits are valid only in !FIFO32 mode.
+ */
+#define SCALER_DISPSLAVE0 0x000000c0
+#define SCALER_DISPSLAVE1 0x000000c9
+#define SCALER_DISPSLAVE2 0x000000d0
+# define SCALER_DISPSLAVE_ISSUE_VSTART BIT(31)
+# define SCALER_DISPSLAVE_ISSUE_HSTART BIT(30)
+/* Set when the current line has been read and an HSTART is required. */
+# define SCALER_DISPSLAVE_EOL BIT(26)
+/* Set when the display FIFO is empty. */
+# define SCALER_DISPSLAVE_EMPTY BIT(25)
+/* Set when there is RGB data ready to read. */
+# define SCALER_DISPSLAVE_VALID BIT(24)
+# define SCALER_DISPSLAVE_RGB_MASK VC4_MASK(23, 0)
+# define SCALER_DISPSLAVE_RGB_SHIFT 0
+
+#define SCALER_GAMDATA 0x000000e0
+#define SCALER_DLIST_START 0x00002000
+#define SCALER_DLIST_SIZE 0x00004000
+
+#define VC4_HDMI_CORE_REV 0x000
+
+#define VC4_HDMI_SW_RESET_CONTROL 0x004
+# define VC4_HDMI_SW_RESET_FORMAT_DETECT BIT(1)
+# define VC4_HDMI_SW_RESET_HDMI BIT(0)
+
+#define VC4_HDMI_HOTPLUG_INT 0x008
+
+#define VC4_HDMI_HOTPLUG 0x00c
+# define VC4_HDMI_HOTPLUG_CONNECTED BIT(0)
+
+/* 3 bits per field, where each field maps from that corresponding MAI
+ * bus channel to the given HDMI channel.
+ */
+#define VC4_HDMI_MAI_CHANNEL_MAP 0x090
+
+#define VC4_HDMI_MAI_CONFIG 0x094
+# define VC4_HDMI_MAI_CONFIG_FORMAT_REVERSE BIT(27)
+# define VC4_HDMI_MAI_CONFIG_BIT_REVERSE BIT(26)
+# define VC4_HDMI_MAI_CHANNEL_MASK_MASK VC4_MASK(15, 0)
+# define VC4_HDMI_MAI_CHANNEL_MASK_SHIFT 0
+
+/* Last received format word on the MAI bus. */
+#define VC4_HDMI_MAI_FORMAT 0x098
+
+#define VC4_HDMI_AUDIO_PACKET_CONFIG 0x09c
+# define VC4_HDMI_AUDIO_PACKET_ZERO_DATA_ON_SAMPLE_FLAT BIT(29)
+# define VC4_HDMI_AUDIO_PACKET_ZERO_DATA_ON_INACTIVE_CHANNELS BIT(24)
+# define VC4_HDMI_AUDIO_PACKET_FORCE_SAMPLE_PRESENT BIT(19)
+# define VC4_HDMI_AUDIO_PACKET_FORCE_B_FRAME BIT(18)
+# define VC4_HDMI_AUDIO_PACKET_B_FRAME_IDENTIFIER_MASK VC4_MASK(13, 10)
+# define VC4_HDMI_AUDIO_PACKET_B_FRAME_IDENTIFIER_SHIFT 10
+/* If set, then multichannel, otherwise 2 channel. */
+# define VC4_HDMI_AUDIO_PACKET_AUDIO_LAYOUT BIT(9)
+/* If set, then AUDIO_LAYOUT overrides audio_cea_mask */
+# define VC4_HDMI_AUDIO_PACKET_FORCE_AUDIO_LAYOUT BIT(8)
+# define VC4_HDMI_AUDIO_PACKET_CEA_MASK_MASK VC4_MASK(7, 0)
+# define VC4_HDMI_AUDIO_PACKET_CEA_MASK_SHIFT 0
+
+#define VC4_HDMI_RAM_PACKET_CONFIG 0x0a0
+# define VC4_HDMI_RAM_PACKET_ENABLE BIT(16)
+
+#define VC4_HDMI_RAM_PACKET_STATUS 0x0a4
+
+#define VC4_HDMI_CRP_CFG 0x0a8
+/* When set, the CTS_PERIOD counts based on MAI bus sync pulse instead
+ * of pixel clock.
+ */
+# define VC4_HDMI_CRP_USE_MAI_BUS_SYNC_FOR_CTS BIT(26)
+/* When set, no CRP packets will be sent. */
+# define VC4_HDMI_CRP_CFG_DISABLE BIT(25)
+/* If set, generates CTS values based on N, audio clock, and video
+ * clock. N must be divisible by 128.
+ */
+# define VC4_HDMI_CRP_CFG_EXTERNAL_CTS_EN BIT(24)
+# define VC4_HDMI_CRP_CFG_N_MASK VC4_MASK(19, 0)
+# define VC4_HDMI_CRP_CFG_N_SHIFT 0
+
+/* 20-bit fields containing CTS values to be transmitted if !EXTERNAL_CTS_EN */
+#define VC4_HDMI_CTS_0 0x0ac
+#define VC4_HDMI_CTS_1 0x0b0
+/* 20-bit fields containing number of clocks to send CTS0/1 before
+ * switching to the other one.
+ */
+#define VC4_HDMI_CTS_PERIOD_0 0x0b4
+#define VC4_HDMI_CTS_PERIOD_1 0x0b8
+
+#define VC4_HDMI_HORZA 0x0c4
+# define VC4_HDMI_HORZA_VPOS BIT(14)
+# define VC4_HDMI_HORZA_HPOS BIT(13)
+/* Horizontal active pixels (hdisplay). */
+# define VC4_HDMI_HORZA_HAP_MASK VC4_MASK(12, 0)
+# define VC4_HDMI_HORZA_HAP_SHIFT 0
+
+#define VC4_HDMI_HORZB 0x0c8
+/* Horizontal pack porch (htotal - hsync_end). */
+# define VC4_HDMI_HORZB_HBP_MASK VC4_MASK(29, 20)
+# define VC4_HDMI_HORZB_HBP_SHIFT 20
+/* Horizontal sync pulse (hsync_end - hsync_start). */
+# define VC4_HDMI_HORZB_HSP_MASK VC4_MASK(19, 10)
+# define VC4_HDMI_HORZB_HSP_SHIFT 10
+/* Horizontal front porch (hsync_start - hdisplay). */
+# define VC4_HDMI_HORZB_HFP_MASK VC4_MASK(9, 0)
+# define VC4_HDMI_HORZB_HFP_SHIFT 0
+
+#define VC4_HDMI_FIFO_CTL 0x05c
+# define VC4_HDMI_FIFO_CTL_RECENTER_DONE BIT(14)
+# define VC4_HDMI_FIFO_CTL_USE_EMPTY BIT(13)
+# define VC4_HDMI_FIFO_CTL_ON_VB BIT(7)
+# define VC4_HDMI_FIFO_CTL_RECENTER BIT(6)
+# define VC4_HDMI_FIFO_CTL_FIFO_RESET BIT(5)
+# define VC4_HDMI_FIFO_CTL_USE_PLL_LOCK BIT(4)
+# define VC4_HDMI_FIFO_CTL_INV_CLK_XFR BIT(3)
+# define VC4_HDMI_FIFO_CTL_CAPTURE_PTR BIT(2)
+# define VC4_HDMI_FIFO_CTL_USE_FULL BIT(1)
+# define VC4_HDMI_FIFO_CTL_MASTER_SLAVE_N BIT(0)
+# define VC4_HDMI_FIFO_VALID_WRITE_MASK 0xefff
+
+#define VC4_HDMI_SCHEDULER_CONTROL 0x0c0
+# define VC4_HDMI_SCHEDULER_CONTROL_MANUAL_FORMAT BIT(15)
+# define VC4_HDMI_SCHEDULER_CONTROL_IGNORE_VSYNC_PREDICTS BIT(5)
+# define VC4_HDMI_SCHEDULER_CONTROL_VERT_ALWAYS_KEEPOUT BIT(3)
+# define VC4_HDMI_SCHEDULER_CONTROL_HDMI_ACTIVE BIT(1)
+# define VC4_HDMI_SCHEDULER_CONTROL_MODE_HDMI BIT(0)
+
+#define VC4_HDMI_VERTA0 0x0cc
+#define VC4_HDMI_VERTA1 0x0d4
+/* Vertical sync pulse (vsync_end - vsync_start). */
+# define VC4_HDMI_VERTA_VSP_MASK VC4_MASK(24, 20)
+# define VC4_HDMI_VERTA_VSP_SHIFT 20
+/* Vertical front porch (vsync_start - vdisplay). */
+# define VC4_HDMI_VERTA_VFP_MASK VC4_MASK(19, 13)
+# define VC4_HDMI_VERTA_VFP_SHIFT 13
+/* Vertical active lines (vdisplay). */
+# define VC4_HDMI_VERTA_VAL_MASK VC4_MASK(12, 0)
+# define VC4_HDMI_VERTA_VAL_SHIFT 0
+
+#define VC4_HDMI_VERTB0 0x0d0
+#define VC4_HDMI_VERTB1 0x0d8
+/* Vertical sync pulse offset (for interlaced) */
+# define VC4_HDMI_VERTB_VSPO_MASK VC4_MASK(21, 9)
+# define VC4_HDMI_VERTB_VSPO_SHIFT 9
+/* Vertical pack porch (vtotal - vsync_end). */
+# define VC4_HDMI_VERTB_VBP_MASK VC4_MASK(8, 0)
+# define VC4_HDMI_VERTB_VBP_SHIFT 0
+
+#define VC4_HDMI_CEC_CNTRL_1 0x0e8
+/* Set when the transmission has ended. */
+# define VC4_HDMI_CEC_TX_EOM BIT(31)
+/* If set, transmission was acked on the 1st or 2nd attempt (only one
+ * retry is attempted). If in continuous mode, this means TX needs to
+ * be filled if !TX_EOM.
+ */
+# define VC4_HDMI_CEC_TX_STATUS_GOOD BIT(30)
+# define VC4_HDMI_CEC_RX_EOM BIT(29)
+# define VC4_HDMI_CEC_RX_STATUS_GOOD BIT(28)
+/* Number of bytes received for the message. */
+# define VC4_HDMI_CEC_REC_WRD_CNT_MASK VC4_MASK(27, 24)
+# define VC4_HDMI_CEC_REC_WRD_CNT_SHIFT 24
+/* Sets continuous receive mode. Generates interrupt after each 8
+ * bytes to signal that RX_DATA should be consumed, and at RX_EOM.
+ *
+ * If disabled, maximum 16 bytes will be received (including header),
+ * and interrupt at RX_EOM. Later bytes will be acked but not put
+ * into the RX_DATA.
+ */
+# define VC4_HDMI_CEC_RX_CONTINUE BIT(23)
+# define VC4_HDMI_CEC_TX_CONTINUE BIT(22)
+/* Set this after a CEC interrupt. */
+# define VC4_HDMI_CEC_CLEAR_RECEIVE_OFF BIT(21)
+/* Starts a TX. Will wait for appropriate idel time before CEC
+ * activity. Must be cleared in between transmits.
+ */
+# define VC4_HDMI_CEC_START_XMIT_BEGIN BIT(20)
+# define VC4_HDMI_CEC_MESSAGE_LENGTH_MASK VC4_MASK(19, 16)
+# define VC4_HDMI_CEC_MESSAGE_LENGTH_SHIFT 16
+/* Device's CEC address */
+# define VC4_HDMI_CEC_ADDR_MASK VC4_MASK(15, 12)
+# define VC4_HDMI_CEC_ADDR_SHIFT 12
+/* Divides off of HSM clock to generate CEC bit clock. */
+/* With the current defaults the CEC bit clock is 40 kHz = 25 usec */
+# define VC4_HDMI_CEC_DIV_CLK_CNT_MASK VC4_MASK(11, 0)
+# define VC4_HDMI_CEC_DIV_CLK_CNT_SHIFT 0
+
+/* Set these fields to how many bit clock cycles get to that many
+ * microseconds.
+ */
+#define VC4_HDMI_CEC_CNTRL_2 0x0ec
+# define VC4_HDMI_CEC_CNT_TO_1500_US_MASK VC4_MASK(30, 24)
+# define VC4_HDMI_CEC_CNT_TO_1500_US_SHIFT 24
+# define VC4_HDMI_CEC_CNT_TO_1300_US_MASK VC4_MASK(23, 17)
+# define VC4_HDMI_CEC_CNT_TO_1300_US_SHIFT 17
+# define VC4_HDMI_CEC_CNT_TO_800_US_MASK VC4_MASK(16, 11)
+# define VC4_HDMI_CEC_CNT_TO_800_US_SHIFT 11
+# define VC4_HDMI_CEC_CNT_TO_600_US_MASK VC4_MASK(10, 5)
+# define VC4_HDMI_CEC_CNT_TO_600_US_SHIFT 5
+# define VC4_HDMI_CEC_CNT_TO_400_US_MASK VC4_MASK(4, 0)
+# define VC4_HDMI_CEC_CNT_TO_400_US_SHIFT 0
+
+#define VC4_HDMI_CEC_CNTRL_3 0x0f0
+# define VC4_HDMI_CEC_CNT_TO_2750_US_MASK VC4_MASK(31, 24)
+# define VC4_HDMI_CEC_CNT_TO_2750_US_SHIFT 24
+# define VC4_HDMI_CEC_CNT_TO_2400_US_MASK VC4_MASK(23, 16)
+# define VC4_HDMI_CEC_CNT_TO_2400_US_SHIFT 16
+# define VC4_HDMI_CEC_CNT_TO_2050_US_MASK VC4_MASK(15, 8)
+# define VC4_HDMI_CEC_CNT_TO_2050_US_SHIFT 8
+# define VC4_HDMI_CEC_CNT_TO_1700_US_MASK VC4_MASK(7, 0)
+# define VC4_HDMI_CEC_CNT_TO_1700_US_SHIFT 0
+
+#define VC4_HDMI_CEC_CNTRL_4 0x0f4
+# define VC4_HDMI_CEC_CNT_TO_4300_US_MASK VC4_MASK(31, 24)
+# define VC4_HDMI_CEC_CNT_TO_4300_US_SHIFT 24
+# define VC4_HDMI_CEC_CNT_TO_3900_US_MASK VC4_MASK(23, 16)
+# define VC4_HDMI_CEC_CNT_TO_3900_US_SHIFT 16
+# define VC4_HDMI_CEC_CNT_TO_3600_US_MASK VC4_MASK(15, 8)
+# define VC4_HDMI_CEC_CNT_TO_3600_US_SHIFT 8
+# define VC4_HDMI_CEC_CNT_TO_3500_US_MASK VC4_MASK(7, 0)
+# define VC4_HDMI_CEC_CNT_TO_3500_US_SHIFT 0
+
+#define VC4_HDMI_CEC_CNTRL_5 0x0f8
+# define VC4_HDMI_CEC_TX_SW_RESET BIT(27)
+# define VC4_HDMI_CEC_RX_SW_RESET BIT(26)
+# define VC4_HDMI_CEC_PAD_SW_RESET BIT(25)
+# define VC4_HDMI_CEC_MUX_TP_OUT_CEC BIT(24)
+# define VC4_HDMI_CEC_RX_CEC_INT BIT(23)
+# define VC4_HDMI_CEC_CLK_PRELOAD_MASK VC4_MASK(22, 16)
+# define VC4_HDMI_CEC_CLK_PRELOAD_SHIFT 16
+# define VC4_HDMI_CEC_CNT_TO_4700_US_MASK VC4_MASK(15, 8)
+# define VC4_HDMI_CEC_CNT_TO_4700_US_SHIFT 8
+# define VC4_HDMI_CEC_CNT_TO_4500_US_MASK VC4_MASK(7, 0)
+# define VC4_HDMI_CEC_CNT_TO_4500_US_SHIFT 0
+
+/* Transmit data, first byte is low byte of the 32-bit reg. MSB of
+ * each byte transmitted first.
+ */
+#define VC4_HDMI_CEC_TX_DATA_1 0x0fc
+#define VC4_HDMI_CEC_TX_DATA_2 0x100
+#define VC4_HDMI_CEC_TX_DATA_3 0x104
+#define VC4_HDMI_CEC_TX_DATA_4 0x108
+#define VC4_HDMI_CEC_RX_DATA_1 0x10c
+#define VC4_HDMI_CEC_RX_DATA_2 0x110
+#define VC4_HDMI_CEC_RX_DATA_3 0x114
+#define VC4_HDMI_CEC_RX_DATA_4 0x118
+
+#define VC4_HDMI_TX_PHY_RESET_CTL 0x2c0
+
+#define VC4_HDMI_TX_PHY_CTL0 0x2c4
+# define VC4_HDMI_TX_PHY_RNG_PWRDN BIT(25)
+
+/* Interrupt status bits */
+#define VC4_HDMI_CPU_STATUS 0x340
+#define VC4_HDMI_CPU_SET 0x344
+#define VC4_HDMI_CPU_CLEAR 0x348
+# define VC4_HDMI_CPU_CEC BIT(6)
+# define VC4_HDMI_CPU_HOTPLUG BIT(0)
+
+#define VC4_HDMI_CPU_MASK_STATUS 0x34c
+#define VC4_HDMI_CPU_MASK_SET 0x350
+#define VC4_HDMI_CPU_MASK_CLEAR 0x354
+
+#define VC4_HDMI_GCP(x) (0x400 + ((x) * 0x4))
+#define VC4_HDMI_RAM_PACKET(x) (0x400 + ((x) * 0x24))
+#define VC4_HDMI_PACKET_STRIDE 0x24
+
+#define VC4_HD_M_CTL 0x00c
+/* Debug: Current receive value on the CEC pad. */
+# define VC4_HD_CECRXD BIT(9)
+/* Debug: Override CEC output to 0. */
+# define VC4_HD_CECOVR BIT(8)
+# define VC4_HD_M_REGISTER_FILE_STANDBY (3 << 6)
+# define VC4_HD_M_RAM_STANDBY (3 << 4)
+# define VC4_HD_M_SW_RST BIT(2)
+# define VC4_HD_M_ENABLE BIT(0)
+
+#define VC4_HD_MAI_CTL 0x014
+/* Set when audio stream is received at a slower rate than the
+ * sampling period, so MAI fifo goes empty. Write 1 to clear.
+ */
+# define VC4_HD_MAI_CTL_DLATE BIT(15)
+# define VC4_HD_MAI_CTL_BUSY BIT(14)
+# define VC4_HD_MAI_CTL_CHALIGN BIT(13)
+# define VC4_HD_MAI_CTL_WHOLSMP BIT(12)
+# define VC4_HD_MAI_CTL_FULL BIT(11)
+# define VC4_HD_MAI_CTL_EMPTY BIT(10)
+# define VC4_HD_MAI_CTL_FLUSH BIT(9)
+/* If set, MAI bus generates SPDIF (bit 31) parity instead of passing
+ * through.
+ */
+# define VC4_HD_MAI_CTL_PAREN BIT(8)
+# define VC4_HD_MAI_CTL_CHNUM_MASK VC4_MASK(7, 4)
+# define VC4_HD_MAI_CTL_CHNUM_SHIFT 4
+# define VC4_HD_MAI_CTL_ENABLE BIT(3)
+/* Underflow error status bit, write 1 to clear. */
+# define VC4_HD_MAI_CTL_ERRORE BIT(2)
+/* Overflow error status bit, write 1 to clear. */
+# define VC4_HD_MAI_CTL_ERRORF BIT(1)
+/* Single-shot reset bit. Read value is undefined. */
+# define VC4_HD_MAI_CTL_RESET BIT(0)
+
+#define VC4_HD_MAI_THR 0x018
+# define VC4_HD_MAI_THR_PANICHIGH_MASK VC4_MASK(29, 24)
+# define VC4_HD_MAI_THR_PANICHIGH_SHIFT 24
+# define VC4_HD_MAI_THR_PANICLOW_MASK VC4_MASK(21, 16)
+# define VC4_HD_MAI_THR_PANICLOW_SHIFT 16
+# define VC4_HD_MAI_THR_DREQHIGH_MASK VC4_MASK(13, 8)
+# define VC4_HD_MAI_THR_DREQHIGH_SHIFT 8
+# define VC4_HD_MAI_THR_DREQLOW_MASK VC4_MASK(5, 0)
+# define VC4_HD_MAI_THR_DREQLOW_SHIFT 0
+
+/* Format header to be placed on the MAI data. Unused. */
+#define VC4_HD_MAI_FMT 0x01c
+
+/* Register for DMAing in audio data to be transported over the MAI
+ * bus to the Falcon core.
+ */
+#define VC4_HD_MAI_DATA 0x020
+
+/* Divider from HDMI HSM clock to MAI serial clock. Sampling period
+ * converges to N / (M + 1) cycles.
+ */
+#define VC4_HD_MAI_SMP 0x02c
+# define VC4_HD_MAI_SMP_N_MASK VC4_MASK(31, 8)
+# define VC4_HD_MAI_SMP_N_SHIFT 8
+# define VC4_HD_MAI_SMP_M_MASK VC4_MASK(7, 0)
+# define VC4_HD_MAI_SMP_M_SHIFT 0
+
+#define VC4_HD_VID_CTL 0x038
+# define VC4_HD_VID_CTL_ENABLE BIT(31)
+# define VC4_HD_VID_CTL_UNDERFLOW_ENABLE BIT(30)
+# define VC4_HD_VID_CTL_FRAME_COUNTER_RESET BIT(29)
+# define VC4_HD_VID_CTL_VSYNC_LOW BIT(28)
+# define VC4_HD_VID_CTL_HSYNC_LOW BIT(27)
+
+#define VC4_HD_CSC_CTL 0x040
+# define VC4_HD_CSC_CTL_ORDER_MASK VC4_MASK(7, 5)
+# define VC4_HD_CSC_CTL_ORDER_SHIFT 5
+# define VC4_HD_CSC_CTL_ORDER_RGB 0
+# define VC4_HD_CSC_CTL_ORDER_BGR 1
+# define VC4_HD_CSC_CTL_ORDER_BRG 2
+# define VC4_HD_CSC_CTL_ORDER_GRB 3
+# define VC4_HD_CSC_CTL_ORDER_GBR 4
+# define VC4_HD_CSC_CTL_ORDER_RBG 5
+# define VC4_HD_CSC_CTL_PADMSB BIT(4)
+# define VC4_HD_CSC_CTL_MODE_MASK VC4_MASK(3, 2)
+# define VC4_HD_CSC_CTL_MODE_SHIFT 2
+# define VC4_HD_CSC_CTL_MODE_RGB_TO_SD_YPRPB 0
+# define VC4_HD_CSC_CTL_MODE_RGB_TO_HD_YPRPB 1
+# define VC4_HD_CSC_CTL_MODE_CUSTOM 3
+# define VC4_HD_CSC_CTL_RGB2YCC BIT(1)
+# define VC4_HD_CSC_CTL_ENABLE BIT(0)
+
+#define VC4_HD_CSC_12_11 0x044
+#define VC4_HD_CSC_14_13 0x048
+#define VC4_HD_CSC_22_21 0x04c
+#define VC4_HD_CSC_24_23 0x050
+#define VC4_HD_CSC_32_31 0x054
+#define VC4_HD_CSC_34_33 0x058
+
+#define VC4_HD_FRAME_COUNT 0x068
+
+/* HVS display list information. */
+#define HVS_BOOTLOADER_DLIST_END 32
+
+enum hvs_pixel_format {
+ /* 8bpp */
+ HVS_PIXEL_FORMAT_RGB332 = 0,
+ /* 16bpp */
+ HVS_PIXEL_FORMAT_RGBA4444 = 1,
+ HVS_PIXEL_FORMAT_RGB555 = 2,
+ HVS_PIXEL_FORMAT_RGBA5551 = 3,
+ HVS_PIXEL_FORMAT_RGB565 = 4,
+ /* 24bpp */
+ HVS_PIXEL_FORMAT_RGB888 = 5,
+ HVS_PIXEL_FORMAT_RGBA6666 = 6,
+ /* 32bpp */
+ HVS_PIXEL_FORMAT_RGBA8888 = 7,
+
+ HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE = 8,
+ HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE = 9,
+ HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE = 10,
+ HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE = 11,
+ HVS_PIXEL_FORMAT_H264 = 12,
+ HVS_PIXEL_FORMAT_PALETTE = 13,
+ HVS_PIXEL_FORMAT_YUV444_RGB = 14,
+ HVS_PIXEL_FORMAT_AYUV444_RGB = 15,
+};
+
+/* Note: the LSB is the rightmost character shown. Only valid for
+ * HVS_PIXEL_FORMAT_RGB8888, not RGB888.
+ */
+#define HVS_PIXEL_ORDER_RGBA 0
+#define HVS_PIXEL_ORDER_BGRA 1
+#define HVS_PIXEL_ORDER_ARGB 2
+#define HVS_PIXEL_ORDER_ABGR 3
+
+#define HVS_PIXEL_ORDER_XBRG 0
+#define HVS_PIXEL_ORDER_XRBG 1
+#define HVS_PIXEL_ORDER_XRGB 2
+#define HVS_PIXEL_ORDER_XBGR 3
+
+#define HVS_PIXEL_ORDER_XYCBCR 0
+#define HVS_PIXEL_ORDER_XYCRCB 1
+#define HVS_PIXEL_ORDER_YXCBCR 2
+#define HVS_PIXEL_ORDER_YXCRCB 3
+
+#define SCALER_CTL0_END BIT(31)
+#define SCALER_CTL0_VALID BIT(30)
+
+#define SCALER_CTL0_SIZE_MASK VC4_MASK(29, 24)
+#define SCALER_CTL0_SIZE_SHIFT 24
+
+#define SCALER_CTL0_TILING_MASK VC4_MASK(21, 20)
+#define SCALER_CTL0_TILING_SHIFT 20
+#define SCALER_CTL0_TILING_LINEAR 0
+#define SCALER_CTL0_TILING_64B 1
+#define SCALER_CTL0_TILING_128B 2
+#define SCALER_CTL0_TILING_256B_OR_T 3
+
+#define SCALER_CTL0_ALPHA_MASK BIT(19)
+#define SCALER_CTL0_HFLIP BIT(16)
+#define SCALER_CTL0_VFLIP BIT(15)
+
+#define SCALER_CTL0_KEY_MODE_MASK VC4_MASK(18, 17)
+#define SCALER_CTL0_KEY_MODE_SHIFT 17
+#define SCALER_CTL0_KEY_DISABLED 0
+#define SCALER_CTL0_KEY_LUMA_OR_COMMON_RGB 1
+#define SCALER_CTL0_KEY_MATCH 2 /* turn transparent */
+#define SCALER_CTL0_KEY_REPLACE 3 /* replace with value from key mask word 2 */
+
+#define SCALER_CTL0_ORDER_MASK VC4_MASK(14, 13)
+#define SCALER_CTL0_ORDER_SHIFT 13
+
+#define SCALER_CTL0_RGBA_EXPAND_MASK VC4_MASK(12, 11)
+#define SCALER_CTL0_RGBA_EXPAND_SHIFT 11
+#define SCALER_CTL0_RGBA_EXPAND_ZERO 0
+#define SCALER_CTL0_RGBA_EXPAND_LSB 1
+#define SCALER_CTL0_RGBA_EXPAND_MSB 2
+#define SCALER_CTL0_RGBA_EXPAND_ROUND 3
+
+#define SCALER_CTL0_SCL1_MASK VC4_MASK(10, 8)
+#define SCALER_CTL0_SCL1_SHIFT 8
+
+#define SCALER_CTL0_SCL0_MASK VC4_MASK(7, 5)
+#define SCALER_CTL0_SCL0_SHIFT 5
+
+#define SCALER_CTL0_SCL_H_PPF_V_PPF 0
+#define SCALER_CTL0_SCL_H_TPZ_V_PPF 1
+#define SCALER_CTL0_SCL_H_PPF_V_TPZ 2
+#define SCALER_CTL0_SCL_H_TPZ_V_TPZ 3
+#define SCALER_CTL0_SCL_H_PPF_V_NONE 4
+#define SCALER_CTL0_SCL_H_NONE_V_PPF 5
+#define SCALER_CTL0_SCL_H_NONE_V_TPZ 6
+#define SCALER_CTL0_SCL_H_TPZ_V_NONE 7
+
+/* Set to indicate no scaling. */
+#define SCALER_CTL0_UNITY BIT(4)
+
+#define SCALER_CTL0_PIXEL_FORMAT_MASK VC4_MASK(3, 0)
+#define SCALER_CTL0_PIXEL_FORMAT_SHIFT 0
+
+#define SCALER_POS0_FIXED_ALPHA_MASK VC4_MASK(31, 24)
+#define SCALER_POS0_FIXED_ALPHA_SHIFT 24
+
+#define SCALER_POS0_START_Y_MASK VC4_MASK(23, 12)
+#define SCALER_POS0_START_Y_SHIFT 12
+
+#define SCALER_POS0_START_X_MASK VC4_MASK(11, 0)
+#define SCALER_POS0_START_X_SHIFT 0
+
+#define SCALER_POS1_SCL_HEIGHT_MASK VC4_MASK(27, 16)
+#define SCALER_POS1_SCL_HEIGHT_SHIFT 16
+
+#define SCALER_POS1_SCL_WIDTH_MASK VC4_MASK(11, 0)
+#define SCALER_POS1_SCL_WIDTH_SHIFT 0
+
+#define SCALER_POS2_ALPHA_MODE_MASK VC4_MASK(31, 30)
+#define SCALER_POS2_ALPHA_MODE_SHIFT 30
+#define SCALER_POS2_ALPHA_MODE_PIPELINE 0
+#define SCALER_POS2_ALPHA_MODE_FIXED 1
+#define SCALER_POS2_ALPHA_MODE_FIXED_NONZERO 2
+#define SCALER_POS2_ALPHA_MODE_FIXED_OVER_0x07 3
+#define SCALER_POS2_ALPHA_PREMULT BIT(29)
+#define SCALER_POS2_ALPHA_MIX BIT(28)
+
+#define SCALER_POS2_HEIGHT_MASK VC4_MASK(27, 16)
+#define SCALER_POS2_HEIGHT_SHIFT 16
+
+#define SCALER_POS2_WIDTH_MASK VC4_MASK(11, 0)
+#define SCALER_POS2_WIDTH_SHIFT 0
+
+/* Color Space Conversion words. Some values are S2.8 signed
+ * integers, except that the 2 integer bits map as {0x0: 0, 0x1: 1,
+ * 0x2: 2, 0x3: -1}
+ */
+/* bottom 8 bits of S2.8 contribution of Cr to Blue */
+#define SCALER_CSC0_COEF_CR_BLU_MASK VC4_MASK(31, 24)
+#define SCALER_CSC0_COEF_CR_BLU_SHIFT 24
+/* Signed offset to apply to Y before CSC. (Y' = Y + YY_OFS) */
+#define SCALER_CSC0_COEF_YY_OFS_MASK VC4_MASK(23, 16)
+#define SCALER_CSC0_COEF_YY_OFS_SHIFT 16
+/* Signed offset to apply to CB before CSC (Cb' = Cb - 128 + CB_OFS). */
+#define SCALER_CSC0_COEF_CB_OFS_MASK VC4_MASK(15, 8)
+#define SCALER_CSC0_COEF_CB_OFS_SHIFT 8
+/* Signed offset to apply to CB before CSC (Cr' = Cr - 128 + CR_OFS). */
+#define SCALER_CSC0_COEF_CR_OFS_MASK VC4_MASK(7, 0)
+#define SCALER_CSC0_COEF_CR_OFS_SHIFT 0
+#define SCALER_CSC0_ITR_R_601_5 0x00f00000
+#define SCALER_CSC0_ITR_R_709_3 0x00f00000
+#define SCALER_CSC0_JPEG_JFIF 0x00000000
+
+/* S2.8 contribution of Cb to Green */
+#define SCALER_CSC1_COEF_CB_GRN_MASK VC4_MASK(31, 22)
+#define SCALER_CSC1_COEF_CB_GRN_SHIFT 22
+/* S2.8 contribution of Cr to Green */
+#define SCALER_CSC1_COEF_CR_GRN_MASK VC4_MASK(21, 12)
+#define SCALER_CSC1_COEF_CR_GRN_SHIFT 12
+/* S2.8 contribution of Y to all of RGB */
+#define SCALER_CSC1_COEF_YY_ALL_MASK VC4_MASK(11, 2)
+#define SCALER_CSC1_COEF_YY_ALL_SHIFT 2
+/* top 2 bits of S2.8 contribution of Cr to Blue */
+#define SCALER_CSC1_COEF_CR_BLU_MASK VC4_MASK(1, 0)
+#define SCALER_CSC1_COEF_CR_BLU_SHIFT 0
+#define SCALER_CSC1_ITR_R_601_5 0xe73304a8
+#define SCALER_CSC1_ITR_R_709_3 0xf2b784a8
+#define SCALER_CSC1_JPEG_JFIF 0xea34a400
+
+/* S2.8 contribution of Cb to Red */
+#define SCALER_CSC2_COEF_CB_RED_MASK VC4_MASK(29, 20)
+#define SCALER_CSC2_COEF_CB_RED_SHIFT 20
+/* S2.8 contribution of Cr to Red */
+#define SCALER_CSC2_COEF_CR_RED_MASK VC4_MASK(19, 10)
+#define SCALER_CSC2_COEF_CR_RED_SHIFT 10
+/* S2.8 contribution of Cb to Blue */
+#define SCALER_CSC2_COEF_CB_BLU_MASK VC4_MASK(19, 10)
+#define SCALER_CSC2_COEF_CB_BLU_SHIFT 10
+#define SCALER_CSC2_ITR_R_601_5 0x00066204
+#define SCALER_CSC2_ITR_R_709_3 0x00072a1c
+#define SCALER_CSC2_JPEG_JFIF 0x000599c5
+
+#define SCALER_TPZ0_VERT_RECALC BIT(31)
+#define SCALER_TPZ0_SCALE_MASK VC4_MASK(28, 8)
+#define SCALER_TPZ0_SCALE_SHIFT 8
+#define SCALER_TPZ0_IPHASE_MASK VC4_MASK(7, 0)
+#define SCALER_TPZ0_IPHASE_SHIFT 0
+#define SCALER_TPZ1_RECIP_MASK VC4_MASK(15, 0)
+#define SCALER_TPZ1_RECIP_SHIFT 0
+
+/* Skips interpolating coefficients to 64 phases, so just 8 are used.
+ * Required for nearest neighbor.
+ */
+#define SCALER_PPF_NOINTERP BIT(31)
+/* Replaes the highest valued coefficient with one that makes all 4
+ * sum to unity.
+ */
+#define SCALER_PPF_AGC BIT(30)
+#define SCALER_PPF_SCALE_MASK VC4_MASK(24, 8)
+#define SCALER_PPF_SCALE_SHIFT 8
+#define SCALER_PPF_IPHASE_MASK VC4_MASK(6, 0)
+#define SCALER_PPF_IPHASE_SHIFT 0
+
+#define SCALER_PPF_KERNEL_OFFSET_MASK VC4_MASK(13, 0)
+#define SCALER_PPF_KERNEL_OFFSET_SHIFT 0
+#define SCALER_PPF_KERNEL_UNCACHED BIT(31)
+
+/* PITCH0/1/2 fields for raster. */
+#define SCALER_SRC_PITCH_MASK VC4_MASK(15, 0)
+#define SCALER_SRC_PITCH_SHIFT 0
+
+/* PITCH0/1/2 fields for tiled (SAND). */
+#define SCALER_TILE_SKIP_0_MASK VC4_MASK(18, 16)
+#define SCALER_TILE_SKIP_0_SHIFT 16
+#define SCALER_TILE_HEIGHT_MASK VC4_MASK(15, 0)
+#define SCALER_TILE_HEIGHT_SHIFT 0
+
+/* PITCH0 fields for T-tiled. */
+#define SCALER_PITCH0_TILE_WIDTH_L_MASK VC4_MASK(22, 16)
+#define SCALER_PITCH0_TILE_WIDTH_L_SHIFT 16
+#define SCALER_PITCH0_TILE_LINE_DIR BIT(15)
+#define SCALER_PITCH0_TILE_INITIAL_LINE_DIR BIT(14)
+/* Y offset within a tile. */
+#define SCALER_PITCH0_TILE_Y_OFFSET_MASK VC4_MASK(13, 7)
+#define SCALER_PITCH0_TILE_Y_OFFSET_SHIFT 7
+#define SCALER_PITCH0_TILE_WIDTH_R_MASK VC4_MASK(6, 0)
+#define SCALER_PITCH0_TILE_WIDTH_R_SHIFT 0
+
+#endif /* VC4_REGS_H */
diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c
new file mode 100644
index 000000000..273984f71
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_render_cl.c
@@ -0,0 +1,661 @@
+/*
+ * Copyright © 2014-2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * DOC: Render command list generation
+ *
+ * In the V3D hardware, render command lists are what load and store
+ * tiles of a framebuffer and optionally call out to binner-generated
+ * command lists to do the 3D drawing for that tile.
+ *
+ * In the VC4 driver, render command list generation is performed by the
+ * kernel instead of userspace. We do this because validating a
+ * user-submitted command list is hard to get right and has high CPU overhead,
+ * while the number of valid configurations for render command lists is
+ * actually fairly low.
+ */
+
+#include "uapi/drm/vc4_drm.h"
+#include "vc4_drv.h"
+#include "vc4_packet.h"
+
+struct vc4_rcl_setup {
+ struct drm_gem_cma_object *color_read;
+ struct drm_gem_cma_object *color_write;
+ struct drm_gem_cma_object *zs_read;
+ struct drm_gem_cma_object *zs_write;
+ struct drm_gem_cma_object *msaa_color_write;
+ struct drm_gem_cma_object *msaa_zs_write;
+
+ struct drm_gem_cma_object *rcl;
+ u32 next_offset;
+
+ u32 next_write_bo_index;
+};
+
+static inline void rcl_u8(struct vc4_rcl_setup *setup, u8 val)
+{
+ *(u8 *)(setup->rcl->vaddr + setup->next_offset) = val;
+ setup->next_offset += 1;
+}
+
+static inline void rcl_u16(struct vc4_rcl_setup *setup, u16 val)
+{
+ *(u16 *)(setup->rcl->vaddr + setup->next_offset) = val;
+ setup->next_offset += 2;
+}
+
+static inline void rcl_u32(struct vc4_rcl_setup *setup, u32 val)
+{
+ *(u32 *)(setup->rcl->vaddr + setup->next_offset) = val;
+ setup->next_offset += 4;
+}
+
+/*
+ * Emits a no-op STORE_TILE_BUFFER_GENERAL.
+ *
+ * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of
+ * some sort before another load is triggered.
+ */
+static void vc4_store_before_load(struct vc4_rcl_setup *setup)
+{
+ rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
+ rcl_u16(setup,
+ VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_NONE,
+ VC4_LOADSTORE_TILE_BUFFER_BUFFER) |
+ VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR |
+ VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR |
+ VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR);
+ rcl_u32(setup, 0); /* no address, since we're in None mode */
+}
+
+/*
+ * Calculates the physical address of the start of a tile in a RCL surface.
+ *
+ * Unlike the other load/store packets,
+ * VC4_PACKET_LOAD/STORE_FULL_RES_TILE_BUFFER don't look at the tile
+ * coordinates packet, and instead just store to the address given.
+ */
+static uint32_t vc4_full_res_offset(struct vc4_exec_info *exec,
+ struct drm_gem_cma_object *bo,
+ struct drm_vc4_submit_rcl_surface *surf,
+ uint8_t x, uint8_t y)
+{
+ return bo->paddr + surf->offset + VC4_TILE_BUFFER_SIZE *
+ (DIV_ROUND_UP(exec->args->width, 32) * y + x);
+}
+
+/*
+ * Emits a PACKET_TILE_COORDINATES if one isn't already pending.
+ *
+ * The tile coordinates packet triggers a pending load if there is one, are
+ * used for clipping during rendering, and determine where loads/stores happen
+ * relative to their base address.
+ */
+static void vc4_tile_coordinates(struct vc4_rcl_setup *setup,
+ uint32_t x, uint32_t y)
+{
+ rcl_u8(setup, VC4_PACKET_TILE_COORDINATES);
+ rcl_u8(setup, x);
+ rcl_u8(setup, y);
+}
+
+static void emit_tile(struct vc4_exec_info *exec,
+ struct vc4_rcl_setup *setup,
+ uint8_t x, uint8_t y, bool first, bool last)
+{
+ struct drm_vc4_submit_cl *args = exec->args;
+ bool has_bin = args->bin_cl_size != 0;
+
+ /* Note that the load doesn't actually occur until the
+ * tile coords packet is processed, and only one load
+ * may be outstanding at a time.
+ */
+ if (setup->color_read) {
+ if (args->color_read.flags &
+ VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
+ rcl_u8(setup, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER);
+ rcl_u32(setup,
+ vc4_full_res_offset(exec, setup->color_read,
+ &args->color_read, x, y) |
+ VC4_LOADSTORE_FULL_RES_DISABLE_ZS);
+ } else {
+ rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
+ rcl_u16(setup, args->color_read.bits);
+ rcl_u32(setup, setup->color_read->paddr +
+ args->color_read.offset);
+ }
+ }
+
+ if (setup->zs_read) {
+ if (args->zs_read.flags &
+ VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
+ rcl_u8(setup, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER);
+ rcl_u32(setup,
+ vc4_full_res_offset(exec, setup->zs_read,
+ &args->zs_read, x, y) |
+ VC4_LOADSTORE_FULL_RES_DISABLE_COLOR);
+ } else {
+ if (setup->color_read) {
+ /* Exec previous load. */
+ vc4_tile_coordinates(setup, x, y);
+ vc4_store_before_load(setup);
+ }
+
+ rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
+ rcl_u16(setup, args->zs_read.bits);
+ rcl_u32(setup, setup->zs_read->paddr +
+ args->zs_read.offset);
+ }
+ }
+
+ /* Clipping depends on tile coordinates having been
+ * emitted, so we always need one here.
+ */
+ vc4_tile_coordinates(setup, x, y);
+
+ /* Wait for the binner before jumping to the first
+ * tile's lists.
+ */
+ if (first && has_bin)
+ rcl_u8(setup, VC4_PACKET_WAIT_ON_SEMAPHORE);
+
+ if (has_bin) {
+ rcl_u8(setup, VC4_PACKET_BRANCH_TO_SUB_LIST);
+ rcl_u32(setup, (exec->tile_alloc_offset +
+ (y * exec->bin_tiles_x + x) * 32));
+ }
+
+ if (setup->msaa_color_write) {
+ bool last_tile_write = (!setup->msaa_zs_write &&
+ !setup->zs_write &&
+ !setup->color_write);
+ uint32_t bits = VC4_LOADSTORE_FULL_RES_DISABLE_ZS;
+
+ if (!last_tile_write)
+ bits |= VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL;
+ else if (last)
+ bits |= VC4_LOADSTORE_FULL_RES_EOF;
+ rcl_u8(setup, VC4_PACKET_STORE_FULL_RES_TILE_BUFFER);
+ rcl_u32(setup,
+ vc4_full_res_offset(exec, setup->msaa_color_write,
+ &args->msaa_color_write, x, y) |
+ bits);
+ }
+
+ if (setup->msaa_zs_write) {
+ bool last_tile_write = (!setup->zs_write &&
+ !setup->color_write);
+ uint32_t bits = VC4_LOADSTORE_FULL_RES_DISABLE_COLOR;
+
+ if (setup->msaa_color_write)
+ vc4_tile_coordinates(setup, x, y);
+ if (!last_tile_write)
+ bits |= VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL;
+ else if (last)
+ bits |= VC4_LOADSTORE_FULL_RES_EOF;
+ rcl_u8(setup, VC4_PACKET_STORE_FULL_RES_TILE_BUFFER);
+ rcl_u32(setup,
+ vc4_full_res_offset(exec, setup->msaa_zs_write,
+ &args->msaa_zs_write, x, y) |
+ bits);
+ }
+
+ if (setup->zs_write) {
+ bool last_tile_write = !setup->color_write;
+
+ if (setup->msaa_color_write || setup->msaa_zs_write)
+ vc4_tile_coordinates(setup, x, y);
+
+ rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
+ rcl_u16(setup, args->zs_write.bits |
+ (last_tile_write ?
+ 0 : VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR));
+ rcl_u32(setup,
+ (setup->zs_write->paddr + args->zs_write.offset) |
+ ((last && last_tile_write) ?
+ VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
+ }
+
+ if (setup->color_write) {
+ if (setup->msaa_color_write || setup->msaa_zs_write ||
+ setup->zs_write) {
+ vc4_tile_coordinates(setup, x, y);
+ }
+
+ if (last)
+ rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF);
+ else
+ rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER);
+ }
+}
+
+static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
+ struct vc4_rcl_setup *setup)
+{
+ struct drm_vc4_submit_cl *args = exec->args;
+ bool has_bin = args->bin_cl_size != 0;
+ uint8_t min_x_tile = args->min_x_tile;
+ uint8_t min_y_tile = args->min_y_tile;
+ uint8_t max_x_tile = args->max_x_tile;
+ uint8_t max_y_tile = args->max_y_tile;
+ uint8_t xtiles = max_x_tile - min_x_tile + 1;
+ uint8_t ytiles = max_y_tile - min_y_tile + 1;
+ uint8_t xi, yi;
+ uint32_t size, loop_body_size;
+ bool positive_x = true;
+ bool positive_y = true;
+
+ if (args->flags & VC4_SUBMIT_CL_FIXED_RCL_ORDER) {
+ if (!(args->flags & VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X))
+ positive_x = false;
+ if (!(args->flags & VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y))
+ positive_y = false;
+ }
+
+ size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE;
+ loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE;
+
+ if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
+ size += VC4_PACKET_CLEAR_COLORS_SIZE +
+ VC4_PACKET_TILE_COORDINATES_SIZE +
+ VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
+ }
+
+ if (setup->color_read) {
+ if (args->color_read.flags &
+ VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
+ loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE;
+ } else {
+ loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;
+ }
+ }
+ if (setup->zs_read) {
+ if (args->zs_read.flags &
+ VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
+ loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE;
+ } else {
+ if (setup->color_read &&
+ !(args->color_read.flags &
+ VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES)) {
+ loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE;
+ loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
+ }
+ loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;
+ }
+ }
+
+ if (has_bin) {
+ size += VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE;
+ loop_body_size += VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE;
+ }
+
+ if (setup->msaa_color_write)
+ loop_body_size += VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE;
+ if (setup->msaa_zs_write)
+ loop_body_size += VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE;
+
+ if (setup->zs_write)
+ loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
+ if (setup->color_write)
+ loop_body_size += VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE;
+
+ /* We need a VC4_PACKET_TILE_COORDINATES in between each store. */
+ loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE *
+ ((setup->msaa_color_write != NULL) +
+ (setup->msaa_zs_write != NULL) +
+ (setup->color_write != NULL) +
+ (setup->zs_write != NULL) - 1);
+
+ size += xtiles * ytiles * loop_body_size;
+
+ setup->rcl = &vc4_bo_create(dev, size, true, VC4_BO_TYPE_RCL)->base;
+ if (IS_ERR(setup->rcl))
+ return PTR_ERR(setup->rcl);
+ list_add_tail(&to_vc4_bo(&setup->rcl->base)->unref_head,
+ &exec->unref_list);
+
+ /* The tile buffer gets cleared when the previous tile is stored. If
+ * the clear values changed between frames, then the tile buffer has
+ * stale clear values in it, so we have to do a store in None mode (no
+ * writes) so that we trigger the tile buffer clear.
+ */
+ if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
+ rcl_u8(setup, VC4_PACKET_CLEAR_COLORS);
+ rcl_u32(setup, args->clear_color[0]);
+ rcl_u32(setup, args->clear_color[1]);
+ rcl_u32(setup, args->clear_z);
+ rcl_u8(setup, args->clear_s);
+
+ vc4_tile_coordinates(setup, 0, 0);
+
+ rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
+ rcl_u16(setup, VC4_LOADSTORE_TILE_BUFFER_NONE);
+ rcl_u32(setup, 0); /* no address, since we're in None mode */
+ }
+
+ rcl_u8(setup, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
+ rcl_u32(setup,
+ (setup->color_write ? (setup->color_write->paddr +
+ args->color_write.offset) :
+ 0));
+ rcl_u16(setup, args->width);
+ rcl_u16(setup, args->height);
+ rcl_u16(setup, args->color_write.bits);
+
+ for (yi = 0; yi < ytiles; yi++) {
+ int y = positive_y ? min_y_tile + yi : max_y_tile - yi;
+ for (xi = 0; xi < xtiles; xi++) {
+ int x = positive_x ? min_x_tile + xi : max_x_tile - xi;
+ bool first = (xi == 0 && yi == 0);
+ bool last = (xi == xtiles - 1 && yi == ytiles - 1);
+
+ emit_tile(exec, setup, x, y, first, last);
+ }
+ }
+
+ BUG_ON(setup->next_offset != size);
+ exec->ct1ca = setup->rcl->paddr;
+ exec->ct1ea = setup->rcl->paddr + setup->next_offset;
+
+ return 0;
+}
+
+static int vc4_full_res_bounds_check(struct vc4_exec_info *exec,
+ struct drm_gem_cma_object *obj,
+ struct drm_vc4_submit_rcl_surface *surf)
+{
+ struct drm_vc4_submit_cl *args = exec->args;
+ u32 render_tiles_stride = DIV_ROUND_UP(exec->args->width, 32);
+
+ if (surf->offset > obj->base.size) {
+ DRM_DEBUG("surface offset %d > BO size %zd\n",
+ surf->offset, obj->base.size);
+ return -EINVAL;
+ }
+
+ if ((obj->base.size - surf->offset) / VC4_TILE_BUFFER_SIZE <
+ render_tiles_stride * args->max_y_tile + args->max_x_tile) {
+ DRM_DEBUG("MSAA tile %d, %d out of bounds "
+ "(bo size %zd, offset %d).\n",
+ args->max_x_tile, args->max_y_tile,
+ obj->base.size,
+ surf->offset);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int vc4_rcl_msaa_surface_setup(struct vc4_exec_info *exec,
+ struct drm_gem_cma_object **obj,
+ struct drm_vc4_submit_rcl_surface *surf)
+{
+ if (surf->flags != 0 || surf->bits != 0) {
+ DRM_DEBUG("MSAA surface had nonzero flags/bits\n");
+ return -EINVAL;
+ }
+
+ if (surf->hindex == ~0)
+ return 0;
+
+ *obj = vc4_use_bo(exec, surf->hindex);
+ if (!*obj)
+ return -EINVAL;
+
+ exec->rcl_write_bo[exec->rcl_write_bo_count++] = *obj;
+
+ if (surf->offset & 0xf) {
+ DRM_DEBUG("MSAA write must be 16b aligned.\n");
+ return -EINVAL;
+ }
+
+ return vc4_full_res_bounds_check(exec, *obj, surf);
+}
+
+static int vc4_rcl_surface_setup(struct vc4_exec_info *exec,
+ struct drm_gem_cma_object **obj,
+ struct drm_vc4_submit_rcl_surface *surf,
+ bool is_write)
+{
+ uint8_t tiling = VC4_GET_FIELD(surf->bits,
+ VC4_LOADSTORE_TILE_BUFFER_TILING);
+ uint8_t buffer = VC4_GET_FIELD(surf->bits,
+ VC4_LOADSTORE_TILE_BUFFER_BUFFER);
+ uint8_t format = VC4_GET_FIELD(surf->bits,
+ VC4_LOADSTORE_TILE_BUFFER_FORMAT);
+ int cpp;
+ int ret;
+
+ if (surf->flags & ~VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
+ DRM_DEBUG("Extra flags set\n");
+ return -EINVAL;
+ }
+
+ if (surf->hindex == ~0)
+ return 0;
+
+ *obj = vc4_use_bo(exec, surf->hindex);
+ if (!*obj)
+ return -EINVAL;
+
+ if (is_write)
+ exec->rcl_write_bo[exec->rcl_write_bo_count++] = *obj;
+
+ if (surf->flags & VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
+ if (surf == &exec->args->zs_write) {
+ DRM_DEBUG("general zs write may not be a full-res.\n");
+ return -EINVAL;
+ }
+
+ if (surf->bits != 0) {
+ DRM_DEBUG("load/store general bits set with "
+ "full res load/store.\n");
+ return -EINVAL;
+ }
+
+ ret = vc4_full_res_bounds_check(exec, *obj, surf);
+ if (ret)
+ return ret;
+
+ return 0;
+ }
+
+ if (surf->bits & ~(VC4_LOADSTORE_TILE_BUFFER_TILING_MASK |
+ VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK |
+ VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK)) {
+ DRM_DEBUG("Unknown bits in load/store: 0x%04x\n",
+ surf->bits);
+ return -EINVAL;
+ }
+
+ if (tiling > VC4_TILING_FORMAT_LT) {
+ DRM_DEBUG("Bad tiling format\n");
+ return -EINVAL;
+ }
+
+ if (buffer == VC4_LOADSTORE_TILE_BUFFER_ZS) {
+ if (format != 0) {
+ DRM_DEBUG("No color format should be set for ZS\n");
+ return -EINVAL;
+ }
+ cpp = 4;
+ } else if (buffer == VC4_LOADSTORE_TILE_BUFFER_COLOR) {
+ switch (format) {
+ case VC4_LOADSTORE_TILE_BUFFER_BGR565:
+ case VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER:
+ cpp = 2;
+ break;
+ case VC4_LOADSTORE_TILE_BUFFER_RGBA8888:
+ cpp = 4;
+ break;
+ default:
+ DRM_DEBUG("Bad tile buffer format\n");
+ return -EINVAL;
+ }
+ } else {
+ DRM_DEBUG("Bad load/store buffer %d.\n", buffer);
+ return -EINVAL;
+ }
+
+ if (surf->offset & 0xf) {
+ DRM_DEBUG("load/store buffer must be 16b aligned.\n");
+ return -EINVAL;
+ }
+
+ if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling,
+ exec->args->width, exec->args->height, cpp)) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+vc4_rcl_render_config_surface_setup(struct vc4_exec_info *exec,
+ struct vc4_rcl_setup *setup,
+ struct drm_gem_cma_object **obj,
+ struct drm_vc4_submit_rcl_surface *surf)
+{
+ uint8_t tiling = VC4_GET_FIELD(surf->bits,
+ VC4_RENDER_CONFIG_MEMORY_FORMAT);
+ uint8_t format = VC4_GET_FIELD(surf->bits,
+ VC4_RENDER_CONFIG_FORMAT);
+ int cpp;
+
+ if (surf->flags != 0) {
+ DRM_DEBUG("No flags supported on render config.\n");
+ return -EINVAL;
+ }
+
+ if (surf->bits & ~(VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK |
+ VC4_RENDER_CONFIG_FORMAT_MASK |
+ VC4_RENDER_CONFIG_MS_MODE_4X |
+ VC4_RENDER_CONFIG_DECIMATE_MODE_4X)) {
+ DRM_DEBUG("Unknown bits in render config: 0x%04x\n",
+ surf->bits);
+ return -EINVAL;
+ }
+
+ if (surf->hindex == ~0)
+ return 0;
+
+ *obj = vc4_use_bo(exec, surf->hindex);
+ if (!*obj)
+ return -EINVAL;
+
+ exec->rcl_write_bo[exec->rcl_write_bo_count++] = *obj;
+
+ if (tiling > VC4_TILING_FORMAT_LT) {
+ DRM_DEBUG("Bad tiling format\n");
+ return -EINVAL;
+ }
+
+ switch (format) {
+ case VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED:
+ case VC4_RENDER_CONFIG_FORMAT_BGR565:
+ cpp = 2;
+ break;
+ case VC4_RENDER_CONFIG_FORMAT_RGBA8888:
+ cpp = 4;
+ break;
+ default:
+ DRM_DEBUG("Bad tile buffer format\n");
+ return -EINVAL;
+ }
+
+ if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling,
+ exec->args->width, exec->args->height, cpp)) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec)
+{
+ struct vc4_rcl_setup setup = {0};
+ struct drm_vc4_submit_cl *args = exec->args;
+ bool has_bin = args->bin_cl_size != 0;
+ int ret;
+
+ if (args->min_x_tile > args->max_x_tile ||
+ args->min_y_tile > args->max_y_tile) {
+ DRM_DEBUG("Bad render tile set (%d,%d)-(%d,%d)\n",
+ args->min_x_tile, args->min_y_tile,
+ args->max_x_tile, args->max_y_tile);
+ return -EINVAL;
+ }
+
+ if (has_bin &&
+ (args->max_x_tile > exec->bin_tiles_x ||
+ args->max_y_tile > exec->bin_tiles_y)) {
+ DRM_DEBUG("Render tiles (%d,%d) outside of bin config "
+ "(%d,%d)\n",
+ args->max_x_tile, args->max_y_tile,
+ exec->bin_tiles_x, exec->bin_tiles_y);
+ return -EINVAL;
+ }
+
+ ret = vc4_rcl_render_config_surface_setup(exec, &setup,
+ &setup.color_write,
+ &args->color_write);
+ if (ret)
+ return ret;
+
+ ret = vc4_rcl_surface_setup(exec, &setup.color_read, &args->color_read,
+ false);
+ if (ret)
+ return ret;
+
+ ret = vc4_rcl_surface_setup(exec, &setup.zs_read, &args->zs_read,
+ false);
+ if (ret)
+ return ret;
+
+ ret = vc4_rcl_surface_setup(exec, &setup.zs_write, &args->zs_write,
+ true);
+ if (ret)
+ return ret;
+
+ ret = vc4_rcl_msaa_surface_setup(exec, &setup.msaa_color_write,
+ &args->msaa_color_write);
+ if (ret)
+ return ret;
+
+ ret = vc4_rcl_msaa_surface_setup(exec, &setup.msaa_zs_write,
+ &args->msaa_zs_write);
+ if (ret)
+ return ret;
+
+ /* We shouldn't even have the job submitted to us if there's no
+ * surface to write out.
+ */
+ if (!setup.color_write && !setup.zs_write &&
+ !setup.msaa_color_write && !setup.msaa_zs_write) {
+ DRM_DEBUG("RCL requires color or Z/S write\n");
+ return -EINVAL;
+ }
+
+ return vc4_create_rcl_bo(dev, exec, &setup);
+}
diff --git a/drivers/gpu/drm/vc4/vc4_trace.h b/drivers/gpu/drm/vc4/vc4_trace.h
new file mode 100644
index 000000000..deafb3292
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_trace.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2015 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#if !defined(_VC4_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _VC4_TRACE_H_
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM vc4
+#define TRACE_INCLUDE_FILE vc4_trace
+
+TRACE_EVENT(vc4_wait_for_seqno_begin,
+ TP_PROTO(struct drm_device *dev, uint64_t seqno, uint64_t timeout),
+ TP_ARGS(dev, seqno, timeout),
+
+ TP_STRUCT__entry(
+ __field(u32, dev)
+ __field(u64, seqno)
+ __field(u64, timeout)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev->primary->index;
+ __entry->seqno = seqno;
+ __entry->timeout = timeout;
+ ),
+
+ TP_printk("dev=%u, seqno=%llu, timeout=%llu",
+ __entry->dev, __entry->seqno, __entry->timeout)
+);
+
+TRACE_EVENT(vc4_wait_for_seqno_end,
+ TP_PROTO(struct drm_device *dev, uint64_t seqno),
+ TP_ARGS(dev, seqno),
+
+ TP_STRUCT__entry(
+ __field(u32, dev)
+ __field(u64, seqno)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev->primary->index;
+ __entry->seqno = seqno;
+ ),
+
+ TP_printk("dev=%u, seqno=%llu",
+ __entry->dev, __entry->seqno)
+);
+
+#endif /* _VC4_TRACE_H_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/vc4
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/vc4/vc4_trace_points.c b/drivers/gpu/drm/vc4/vc4_trace_points.c
new file mode 100644
index 000000000..e6278f257
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_trace_points.c
@@ -0,0 +1,14 @@
+/*
+ * Copyright (C) 2015 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "vc4_drv.h"
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "vc4_trace.h"
+#endif
diff --git a/drivers/gpu/drm/vc4/vc4_txp.c b/drivers/gpu/drm/vc4/vc4_txp.c
new file mode 100644
index 000000000..3b56558a5
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_txp.c
@@ -0,0 +1,483 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2018 Broadcom
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ * Boris Brezillon <boris.brezillon@bootlin.com>
+ */
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_writeback.h>
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/of_graph.h>
+#include <linux/of_platform.h>
+#include <linux/pm_runtime.h>
+
+#include "vc4_drv.h"
+#include "vc4_regs.h"
+
+/* Base address of the output. Raster formats must be 4-byte aligned,
+ * T and LT must be 16-byte aligned or maybe utile-aligned (docs are
+ * inconsistent, but probably utile).
+ */
+#define TXP_DST_PTR 0x00
+
+/* Pitch in bytes for raster images, 16-byte aligned. For tiled, it's
+ * the width in tiles.
+ */
+#define TXP_DST_PITCH 0x04
+/* For T-tiled imgaes, DST_PITCH should be the number of tiles wide,
+ * shifted up.
+ */
+# define TXP_T_TILE_WIDTH_SHIFT 7
+/* For LT-tiled images, DST_PITCH should be the number of utiles wide,
+ * shifted up.
+ */
+# define TXP_LT_TILE_WIDTH_SHIFT 4
+
+/* Pre-rotation width/height of the image. Must match HVS config.
+ *
+ * If TFORMAT and 32-bit, limit is 1920 for 32-bit and 3840 to 16-bit
+ * and width/height must be tile or utile-aligned as appropriate. If
+ * transposing (rotating), width is limited to 1920.
+ *
+ * Height is limited to various numbers between 4088 and 4095. I'd
+ * just use 4088 to be safe.
+ */
+#define TXP_DIM 0x08
+# define TXP_HEIGHT_SHIFT 16
+# define TXP_HEIGHT_MASK GENMASK(31, 16)
+# define TXP_WIDTH_SHIFT 0
+# define TXP_WIDTH_MASK GENMASK(15, 0)
+
+#define TXP_DST_CTRL 0x0c
+/* These bits are set to 0x54 */
+#define TXP_PILOT_SHIFT 24
+#define TXP_PILOT_MASK GENMASK(31, 24)
+/* Bits 22-23 are set to 0x01 */
+#define TXP_VERSION_SHIFT 22
+#define TXP_VERSION_MASK GENMASK(23, 22)
+
+/* Powers down the internal memory. */
+# define TXP_POWERDOWN BIT(21)
+
+/* Enables storing the alpha component in 8888/4444, instead of
+ * filling with ~ALPHA_INVERT.
+ */
+# define TXP_ALPHA_ENABLE BIT(20)
+
+/* 4 bits, each enables stores for a channel in each set of 4 bytes.
+ * Set to 0xf for normal operation.
+ */
+# define TXP_BYTE_ENABLE_SHIFT 16
+# define TXP_BYTE_ENABLE_MASK GENMASK(19, 16)
+
+/* Debug: Generate VSTART again at EOF. */
+# define TXP_VSTART_AT_EOF BIT(15)
+
+/* Debug: Terminate the current frame immediately. Stops AXI
+ * writes.
+ */
+# define TXP_ABORT BIT(14)
+
+# define TXP_DITHER BIT(13)
+
+/* Inverts alpha if TXP_ALPHA_ENABLE, chooses fill value for
+ * !TXP_ALPHA_ENABLE.
+ */
+# define TXP_ALPHA_INVERT BIT(12)
+
+/* Note: I've listed the channels here in high bit (in byte 3/2/1) to
+ * low bit (in byte 0) order.
+ */
+# define TXP_FORMAT_SHIFT 8
+# define TXP_FORMAT_MASK GENMASK(11, 8)
+# define TXP_FORMAT_ABGR4444 0
+# define TXP_FORMAT_ARGB4444 1
+# define TXP_FORMAT_BGRA4444 2
+# define TXP_FORMAT_RGBA4444 3
+# define TXP_FORMAT_BGR565 6
+# define TXP_FORMAT_RGB565 7
+/* 888s are non-rotated, raster-only */
+# define TXP_FORMAT_BGR888 8
+# define TXP_FORMAT_RGB888 9
+# define TXP_FORMAT_ABGR8888 12
+# define TXP_FORMAT_ARGB8888 13
+# define TXP_FORMAT_BGRA8888 14
+# define TXP_FORMAT_RGBA8888 15
+
+/* If TFORMAT is set, generates LT instead of T format. */
+# define TXP_LINEAR_UTILE BIT(7)
+
+/* Rotate output by 90 degrees. */
+# define TXP_TRANSPOSE BIT(6)
+
+/* Generate a tiled format for V3D. */
+# define TXP_TFORMAT BIT(5)
+
+/* Generates some undefined test mode output. */
+# define TXP_TEST_MODE BIT(4)
+
+/* Request odd field from HVS. */
+# define TXP_FIELD BIT(3)
+
+/* Raise interrupt when idle. */
+# define TXP_EI BIT(2)
+
+/* Set when generating a frame, clears when idle. */
+# define TXP_BUSY BIT(1)
+
+/* Starts a frame. Self-clearing. */
+# define TXP_GO BIT(0)
+
+/* Number of lines received and committed to memory. */
+#define TXP_PROGRESS 0x10
+
+#define TXP_READ(offset) readl(txp->regs + (offset))
+#define TXP_WRITE(offset, val) writel(val, txp->regs + (offset))
+
+struct vc4_txp {
+ struct platform_device *pdev;
+
+ struct drm_writeback_connector connector;
+
+ void __iomem *regs;
+};
+
+static inline struct vc4_txp *encoder_to_vc4_txp(struct drm_encoder *encoder)
+{
+ return container_of(encoder, struct vc4_txp, connector.encoder);
+}
+
+static inline struct vc4_txp *connector_to_vc4_txp(struct drm_connector *conn)
+{
+ return container_of(conn, struct vc4_txp, connector.base);
+}
+
+#define TXP_REG(reg) { reg, #reg }
+static const struct {
+ u32 reg;
+ const char *name;
+} txp_regs[] = {
+ TXP_REG(TXP_DST_PTR),
+ TXP_REG(TXP_DST_PITCH),
+ TXP_REG(TXP_DIM),
+ TXP_REG(TXP_DST_CTRL),
+ TXP_REG(TXP_PROGRESS),
+};
+
+#ifdef CONFIG_DEBUG_FS
+int vc4_txp_debugfs_regs(struct seq_file *m, void *unused)
+{
+ struct drm_info_node *node = (struct drm_info_node *)m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_txp *txp = vc4->txp;
+ int i;
+
+ if (!txp)
+ return 0;
+
+ for (i = 0; i < ARRAY_SIZE(txp_regs); i++) {
+ seq_printf(m, "%s (0x%04x): 0x%08x\n",
+ txp_regs[i].name, txp_regs[i].reg,
+ TXP_READ(txp_regs[i].reg));
+ }
+
+ return 0;
+}
+#endif
+
+static int vc4_txp_connector_get_modes(struct drm_connector *connector)
+{
+ struct drm_device *dev = connector->dev;
+
+ return drm_add_modes_noedid(connector, dev->mode_config.max_width,
+ dev->mode_config.max_height);
+}
+
+static enum drm_mode_status
+vc4_txp_connector_mode_valid(struct drm_connector *connector,
+ struct drm_display_mode *mode)
+{
+ struct drm_device *dev = connector->dev;
+ struct drm_mode_config *mode_config = &dev->mode_config;
+ int w = mode->hdisplay, h = mode->vdisplay;
+
+ if (w < mode_config->min_width || w > mode_config->max_width)
+ return MODE_BAD_HVALUE;
+
+ if (h < mode_config->min_height || h > mode_config->max_height)
+ return MODE_BAD_VVALUE;
+
+ return MODE_OK;
+}
+
+static const u32 drm_fmts[] = {
+ DRM_FORMAT_RGB888,
+ DRM_FORMAT_BGR888,
+ DRM_FORMAT_XRGB8888,
+ DRM_FORMAT_XBGR8888,
+ DRM_FORMAT_ARGB8888,
+ DRM_FORMAT_ABGR8888,
+ DRM_FORMAT_RGBX8888,
+ DRM_FORMAT_BGRX8888,
+ DRM_FORMAT_RGBA8888,
+ DRM_FORMAT_BGRA8888,
+};
+
+static const u32 txp_fmts[] = {
+ TXP_FORMAT_RGB888,
+ TXP_FORMAT_BGR888,
+ TXP_FORMAT_ARGB8888,
+ TXP_FORMAT_ABGR8888,
+ TXP_FORMAT_ARGB8888,
+ TXP_FORMAT_ABGR8888,
+ TXP_FORMAT_RGBA8888,
+ TXP_FORMAT_BGRA8888,
+ TXP_FORMAT_RGBA8888,
+ TXP_FORMAT_BGRA8888,
+};
+
+static int vc4_txp_connector_atomic_check(struct drm_connector *conn,
+ struct drm_connector_state *conn_state)
+{
+ struct drm_crtc_state *crtc_state;
+ struct drm_gem_cma_object *gem;
+ struct drm_framebuffer *fb;
+ int i;
+
+ if (!conn_state->writeback_job || !conn_state->writeback_job->fb)
+ return 0;
+
+ crtc_state = drm_atomic_get_new_crtc_state(conn_state->state,
+ conn_state->crtc);
+
+ fb = conn_state->writeback_job->fb;
+ if (fb->width != crtc_state->mode.hdisplay ||
+ fb->height != crtc_state->mode.vdisplay) {
+ DRM_DEBUG_KMS("Invalid framebuffer size %ux%u\n",
+ fb->width, fb->height);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(drm_fmts); i++) {
+ if (fb->format->format == drm_fmts[i])
+ break;
+ }
+
+ if (i == ARRAY_SIZE(drm_fmts))
+ return -EINVAL;
+
+ gem = drm_fb_cma_get_gem_obj(fb, 0);
+
+ /* Pitch must be aligned on 16 bytes. */
+ if (fb->pitches[0] & GENMASK(3, 0))
+ return -EINVAL;
+
+ vc4_crtc_txp_armed(crtc_state);
+
+ return 0;
+}
+
+static void vc4_txp_connector_atomic_commit(struct drm_connector *conn,
+ struct drm_connector_state *conn_state)
+{
+ struct vc4_txp *txp = connector_to_vc4_txp(conn);
+ struct drm_gem_cma_object *gem;
+ struct drm_display_mode *mode;
+ struct drm_framebuffer *fb;
+ u32 ctrl;
+ int i;
+
+ if (WARN_ON(!conn_state->writeback_job ||
+ !conn_state->writeback_job->fb))
+ return;
+
+ mode = &conn_state->crtc->state->adjusted_mode;
+ fb = conn_state->writeback_job->fb;
+
+ for (i = 0; i < ARRAY_SIZE(drm_fmts); i++) {
+ if (fb->format->format == drm_fmts[i])
+ break;
+ }
+
+ if (WARN_ON(i == ARRAY_SIZE(drm_fmts)))
+ return;
+
+ ctrl = TXP_GO | TXP_EI |
+ VC4_SET_FIELD(0xf, TXP_BYTE_ENABLE) |
+ VC4_SET_FIELD(txp_fmts[i], TXP_FORMAT);
+
+ if (fb->format->has_alpha)
+ ctrl |= TXP_ALPHA_ENABLE;
+ else
+ /*
+ * If TXP_ALPHA_ENABLE isn't set and TXP_ALPHA_INVERT is, the
+ * hardware will force the output padding to be 0xff.
+ */
+ ctrl |= TXP_ALPHA_INVERT;
+
+ gem = drm_fb_cma_get_gem_obj(fb, 0);
+ TXP_WRITE(TXP_DST_PTR, gem->paddr + fb->offsets[0]);
+ TXP_WRITE(TXP_DST_PITCH, fb->pitches[0]);
+ TXP_WRITE(TXP_DIM,
+ VC4_SET_FIELD(mode->hdisplay, TXP_WIDTH) |
+ VC4_SET_FIELD(mode->vdisplay, TXP_HEIGHT));
+
+ TXP_WRITE(TXP_DST_CTRL, ctrl);
+
+ drm_writeback_queue_job(&txp->connector, conn_state->writeback_job);
+}
+
+static const struct drm_connector_helper_funcs vc4_txp_connector_helper_funcs = {
+ .get_modes = vc4_txp_connector_get_modes,
+ .mode_valid = vc4_txp_connector_mode_valid,
+ .atomic_check = vc4_txp_connector_atomic_check,
+ .atomic_commit = vc4_txp_connector_atomic_commit,
+};
+
+static enum drm_connector_status
+vc4_txp_connector_detect(struct drm_connector *connector, bool force)
+{
+ return connector_status_connected;
+}
+
+static void vc4_txp_connector_destroy(struct drm_connector *connector)
+{
+ drm_connector_unregister(connector);
+ drm_connector_cleanup(connector);
+}
+
+static const struct drm_connector_funcs vc4_txp_connector_funcs = {
+ .detect = vc4_txp_connector_detect,
+ .fill_modes = drm_helper_probe_single_connector_modes,
+ .destroy = vc4_txp_connector_destroy,
+ .reset = drm_atomic_helper_connector_reset,
+ .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+ .atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+static void vc4_txp_encoder_disable(struct drm_encoder *encoder)
+{
+ struct vc4_txp *txp = encoder_to_vc4_txp(encoder);
+
+ if (TXP_READ(TXP_DST_CTRL) & TXP_BUSY) {
+ unsigned long timeout = jiffies + msecs_to_jiffies(1000);
+
+ TXP_WRITE(TXP_DST_CTRL, TXP_ABORT);
+
+ while (TXP_READ(TXP_DST_CTRL) & TXP_BUSY &&
+ time_before(jiffies, timeout))
+ ;
+
+ WARN_ON(TXP_READ(TXP_DST_CTRL) & TXP_BUSY);
+ }
+
+ TXP_WRITE(TXP_DST_CTRL, TXP_POWERDOWN);
+}
+
+static const struct drm_encoder_helper_funcs vc4_txp_encoder_helper_funcs = {
+ .disable = vc4_txp_encoder_disable,
+};
+
+static irqreturn_t vc4_txp_interrupt(int irq, void *data)
+{
+ struct vc4_txp *txp = data;
+
+ TXP_WRITE(TXP_DST_CTRL, TXP_READ(TXP_DST_CTRL) & ~TXP_EI);
+ vc4_crtc_handle_vblank(to_vc4_crtc(txp->connector.base.state->crtc));
+ drm_writeback_signal_completion(&txp->connector, 0);
+
+ return IRQ_HANDLED;
+}
+
+static int vc4_txp_bind(struct device *dev, struct device *master, void *data)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct drm_device *drm = dev_get_drvdata(master);
+ struct vc4_dev *vc4 = to_vc4_dev(drm);
+ struct vc4_txp *txp;
+ int ret, irq;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+
+ txp = devm_kzalloc(dev, sizeof(*txp), GFP_KERNEL);
+ if (!txp)
+ return -ENOMEM;
+
+ txp->pdev = pdev;
+
+ txp->regs = vc4_ioremap_regs(pdev, 0);
+ if (IS_ERR(txp->regs))
+ return PTR_ERR(txp->regs);
+
+ drm_connector_helper_add(&txp->connector.base,
+ &vc4_txp_connector_helper_funcs);
+ ret = drm_writeback_connector_init(drm, &txp->connector,
+ &vc4_txp_connector_funcs,
+ &vc4_txp_encoder_helper_funcs,
+ drm_fmts, ARRAY_SIZE(drm_fmts));
+ if (ret)
+ return ret;
+
+ ret = devm_request_irq(dev, irq, vc4_txp_interrupt, 0,
+ dev_name(dev), txp);
+ if (ret)
+ return ret;
+
+ dev_set_drvdata(dev, txp);
+ vc4->txp = txp;
+
+ return 0;
+}
+
+static void vc4_txp_unbind(struct device *dev, struct device *master,
+ void *data)
+{
+ struct drm_device *drm = dev_get_drvdata(master);
+ struct vc4_dev *vc4 = to_vc4_dev(drm);
+ struct vc4_txp *txp = dev_get_drvdata(dev);
+
+ vc4_txp_connector_destroy(&txp->connector.base);
+
+ vc4->txp = NULL;
+}
+
+static const struct component_ops vc4_txp_ops = {
+ .bind = vc4_txp_bind,
+ .unbind = vc4_txp_unbind,
+};
+
+static int vc4_txp_probe(struct platform_device *pdev)
+{
+ return component_add(&pdev->dev, &vc4_txp_ops);
+}
+
+static int vc4_txp_remove(struct platform_device *pdev)
+{
+ component_del(&pdev->dev, &vc4_txp_ops);
+ return 0;
+}
+
+static const struct of_device_id vc4_txp_dt_match[] = {
+ { .compatible = "brcm,bcm2835-txp" },
+ { /* sentinel */ },
+};
+
+struct platform_driver vc4_txp_driver = {
+ .probe = vc4_txp_probe,
+ .remove = vc4_txp_remove,
+ .driver = {
+ .name = "vc4_txp",
+ .of_match_table = vc4_txp_dt_match,
+ },
+};
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
new file mode 100644
index 000000000..e47e29426
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_v3d.c
@@ -0,0 +1,470 @@
+/*
+ * Copyright (c) 2014 The Linux Foundation. All rights reserved.
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/pm_runtime.h>
+#include "vc4_drv.h"
+#include "vc4_regs.h"
+
+#ifdef CONFIG_DEBUG_FS
+#define REGDEF(reg) { reg, #reg }
+static const struct {
+ uint32_t reg;
+ const char *name;
+} vc4_reg_defs[] = {
+ REGDEF(V3D_IDENT0),
+ REGDEF(V3D_IDENT1),
+ REGDEF(V3D_IDENT2),
+ REGDEF(V3D_SCRATCH),
+ REGDEF(V3D_L2CACTL),
+ REGDEF(V3D_SLCACTL),
+ REGDEF(V3D_INTCTL),
+ REGDEF(V3D_INTENA),
+ REGDEF(V3D_INTDIS),
+ REGDEF(V3D_CT0CS),
+ REGDEF(V3D_CT1CS),
+ REGDEF(V3D_CT0EA),
+ REGDEF(V3D_CT1EA),
+ REGDEF(V3D_CT0CA),
+ REGDEF(V3D_CT1CA),
+ REGDEF(V3D_CT00RA0),
+ REGDEF(V3D_CT01RA0),
+ REGDEF(V3D_CT0LC),
+ REGDEF(V3D_CT1LC),
+ REGDEF(V3D_CT0PC),
+ REGDEF(V3D_CT1PC),
+ REGDEF(V3D_PCS),
+ REGDEF(V3D_BFC),
+ REGDEF(V3D_RFC),
+ REGDEF(V3D_BPCA),
+ REGDEF(V3D_BPCS),
+ REGDEF(V3D_BPOA),
+ REGDEF(V3D_BPOS),
+ REGDEF(V3D_BXCF),
+ REGDEF(V3D_SQRSV0),
+ REGDEF(V3D_SQRSV1),
+ REGDEF(V3D_SQCNTL),
+ REGDEF(V3D_SRQPC),
+ REGDEF(V3D_SRQUA),
+ REGDEF(V3D_SRQUL),
+ REGDEF(V3D_SRQCS),
+ REGDEF(V3D_VPACNTL),
+ REGDEF(V3D_VPMBASE),
+ REGDEF(V3D_PCTRC),
+ REGDEF(V3D_PCTRE),
+ REGDEF(V3D_PCTR(0)),
+ REGDEF(V3D_PCTRS(0)),
+ REGDEF(V3D_PCTR(1)),
+ REGDEF(V3D_PCTRS(1)),
+ REGDEF(V3D_PCTR(2)),
+ REGDEF(V3D_PCTRS(2)),
+ REGDEF(V3D_PCTR(3)),
+ REGDEF(V3D_PCTRS(3)),
+ REGDEF(V3D_PCTR(4)),
+ REGDEF(V3D_PCTRS(4)),
+ REGDEF(V3D_PCTR(5)),
+ REGDEF(V3D_PCTRS(5)),
+ REGDEF(V3D_PCTR(6)),
+ REGDEF(V3D_PCTRS(6)),
+ REGDEF(V3D_PCTR(7)),
+ REGDEF(V3D_PCTRS(7)),
+ REGDEF(V3D_PCTR(8)),
+ REGDEF(V3D_PCTRS(8)),
+ REGDEF(V3D_PCTR(9)),
+ REGDEF(V3D_PCTRS(9)),
+ REGDEF(V3D_PCTR(10)),
+ REGDEF(V3D_PCTRS(10)),
+ REGDEF(V3D_PCTR(11)),
+ REGDEF(V3D_PCTRS(11)),
+ REGDEF(V3D_PCTR(12)),
+ REGDEF(V3D_PCTRS(12)),
+ REGDEF(V3D_PCTR(13)),
+ REGDEF(V3D_PCTRS(13)),
+ REGDEF(V3D_PCTR(14)),
+ REGDEF(V3D_PCTRS(14)),
+ REGDEF(V3D_PCTR(15)),
+ REGDEF(V3D_PCTRS(15)),
+ REGDEF(V3D_DBGE),
+ REGDEF(V3D_FDBGO),
+ REGDEF(V3D_FDBGB),
+ REGDEF(V3D_FDBGR),
+ REGDEF(V3D_FDBGS),
+ REGDEF(V3D_ERRSTAT),
+};
+
+int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused)
+{
+ struct drm_info_node *node = (struct drm_info_node *)m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(vc4_reg_defs); i++) {
+ seq_printf(m, "%s (0x%04x): 0x%08x\n",
+ vc4_reg_defs[i].name, vc4_reg_defs[i].reg,
+ V3D_READ(vc4_reg_defs[i].reg));
+ }
+
+ return 0;
+}
+
+int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused)
+{
+ struct drm_info_node *node = (struct drm_info_node *)m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ uint32_t ident1 = V3D_READ(V3D_IDENT1);
+ uint32_t nslc = VC4_GET_FIELD(ident1, V3D_IDENT1_NSLC);
+ uint32_t tups = VC4_GET_FIELD(ident1, V3D_IDENT1_TUPS);
+ uint32_t qups = VC4_GET_FIELD(ident1, V3D_IDENT1_QUPS);
+
+ seq_printf(m, "Revision: %d\n",
+ VC4_GET_FIELD(ident1, V3D_IDENT1_REV));
+ seq_printf(m, "Slices: %d\n", nslc);
+ seq_printf(m, "TMUs: %d\n", nslc * tups);
+ seq_printf(m, "QPUs: %d\n", nslc * qups);
+ seq_printf(m, "Semaphores: %d\n",
+ VC4_GET_FIELD(ident1, V3D_IDENT1_NSEM));
+
+ return 0;
+}
+#endif /* CONFIG_DEBUG_FS */
+
+static void vc4_v3d_init_hw(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+ /* Take all the memory that would have been reserved for user
+ * QPU programs, since we don't have an interface for running
+ * them, anyway.
+ */
+ V3D_WRITE(V3D_VPMBASE, 0);
+}
+
+int vc4_v3d_get_bin_slot(struct vc4_dev *vc4)
+{
+ struct drm_device *dev = vc4->dev;
+ unsigned long irqflags;
+ int slot;
+ uint64_t seqno = 0;
+ struct vc4_exec_info *exec;
+
+try_again:
+ spin_lock_irqsave(&vc4->job_lock, irqflags);
+ slot = ffs(~vc4->bin_alloc_used);
+ if (slot != 0) {
+ /* Switch from ffs() bit index to a 0-based index. */
+ slot--;
+ vc4->bin_alloc_used |= BIT(slot);
+ spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+ return slot;
+ }
+
+ /* Couldn't find an open slot. Wait for render to complete
+ * and try again.
+ */
+ exec = vc4_last_render_job(vc4);
+ if (exec)
+ seqno = exec->seqno;
+ spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+
+ if (seqno) {
+ int ret = vc4_wait_for_seqno(dev, seqno, ~0ull, true);
+
+ if (ret == 0)
+ goto try_again;
+
+ return ret;
+ }
+
+ return -ENOMEM;
+}
+
+/**
+ * vc4_allocate_bin_bo() - allocates the memory that will be used for
+ * tile binning.
+ *
+ * The binner has a limitation that the addresses in the tile state
+ * buffer that point into the tile alloc buffer or binner overflow
+ * memory only have 28 bits (256MB), and the top 4 on the bus for
+ * tile alloc references end up coming from the tile state buffer's
+ * address.
+ *
+ * To work around this, we allocate a single large buffer while V3D is
+ * in use, make sure that it has the top 4 bits constant across its
+ * entire extent, and then put the tile state, tile alloc, and binner
+ * overflow memory inside that buffer.
+ *
+ * This creates a limitation where we may not be able to execute a job
+ * if it doesn't fit within the buffer that we allocated up front.
+ * However, it turns out that 16MB is "enough for anybody", and
+ * real-world applications run into allocation failures from the
+ * overall CMA pool before they make scenes complicated enough to run
+ * out of bin space.
+ */
+static int vc4_allocate_bin_bo(struct drm_device *drm)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(drm);
+ struct vc4_v3d *v3d = vc4->v3d;
+ uint32_t size = 16 * 1024 * 1024;
+ int ret = 0;
+ struct list_head list;
+
+ /* We may need to try allocating more than once to get a BO
+ * that doesn't cross 256MB. Track the ones we've allocated
+ * that failed so far, so that we can free them when we've got
+ * one that succeeded (if we freed them right away, our next
+ * allocation would probably be the same chunk of memory).
+ */
+ INIT_LIST_HEAD(&list);
+
+ while (true) {
+ struct vc4_bo *bo = vc4_bo_create(drm, size, true,
+ VC4_BO_TYPE_BIN);
+
+ if (IS_ERR(bo)) {
+ ret = PTR_ERR(bo);
+
+ dev_err(&v3d->pdev->dev,
+ "Failed to allocate memory for tile binning: "
+ "%d. You may need to enable CMA or give it "
+ "more memory.",
+ ret);
+ break;
+ }
+
+ /* Check if this BO won't trigger the addressing bug. */
+ if ((bo->base.paddr & 0xf0000000) ==
+ ((bo->base.paddr + bo->base.base.size - 1) & 0xf0000000)) {
+ vc4->bin_bo = bo;
+
+ /* Set up for allocating 512KB chunks of
+ * binner memory. The biggest allocation we
+ * need to do is for the initial tile alloc +
+ * tile state buffer. We can render to a
+ * maximum of ((2048*2048) / (32*32) = 4096
+ * tiles in a frame (until we do floating
+ * point rendering, at which point it would be
+ * 8192). Tile state is 48b/tile (rounded to
+ * a page), and tile alloc is 32b/tile
+ * (rounded to a page), plus a page of extra,
+ * for a total of 320kb for our worst-case.
+ * We choose 512kb so that it divides evenly
+ * into our 16MB, and the rest of the 512kb
+ * will be used as storage for the overflow
+ * from the initial 32b CL per bin.
+ */
+ vc4->bin_alloc_size = 512 * 1024;
+ vc4->bin_alloc_used = 0;
+ vc4->bin_alloc_overflow = 0;
+ WARN_ON_ONCE(sizeof(vc4->bin_alloc_used) * 8 !=
+ bo->base.base.size / vc4->bin_alloc_size);
+
+ break;
+ }
+
+ /* Put it on the list to free later, and try again. */
+ list_add(&bo->unref_head, &list);
+ }
+
+ /* Free all the BOs we allocated but didn't choose. */
+ while (!list_empty(&list)) {
+ struct vc4_bo *bo = list_last_entry(&list,
+ struct vc4_bo, unref_head);
+
+ list_del(&bo->unref_head);
+ drm_gem_object_put_unlocked(&bo->base.base);
+ }
+
+ return ret;
+}
+
+#ifdef CONFIG_PM
+static int vc4_v3d_runtime_suspend(struct device *dev)
+{
+ struct vc4_v3d *v3d = dev_get_drvdata(dev);
+ struct vc4_dev *vc4 = v3d->vc4;
+
+ vc4_irq_uninstall(vc4->dev);
+
+ drm_gem_object_put_unlocked(&vc4->bin_bo->base.base);
+ vc4->bin_bo = NULL;
+
+ clk_disable_unprepare(v3d->clk);
+
+ return 0;
+}
+
+static int vc4_v3d_runtime_resume(struct device *dev)
+{
+ struct vc4_v3d *v3d = dev_get_drvdata(dev);
+ struct vc4_dev *vc4 = v3d->vc4;
+ int ret;
+
+ ret = vc4_allocate_bin_bo(vc4->dev);
+ if (ret)
+ return ret;
+
+ ret = clk_prepare_enable(v3d->clk);
+ if (ret != 0)
+ return ret;
+
+ vc4_v3d_init_hw(vc4->dev);
+
+ /* We disabled the IRQ as part of vc4_irq_uninstall in suspend. */
+ enable_irq(vc4->dev->irq);
+ vc4_irq_postinstall(vc4->dev);
+
+ return 0;
+}
+#endif
+
+static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct drm_device *drm = dev_get_drvdata(master);
+ struct vc4_dev *vc4 = to_vc4_dev(drm);
+ struct vc4_v3d *v3d = NULL;
+ int ret;
+
+ v3d = devm_kzalloc(&pdev->dev, sizeof(*v3d), GFP_KERNEL);
+ if (!v3d)
+ return -ENOMEM;
+
+ dev_set_drvdata(dev, v3d);
+
+ v3d->pdev = pdev;
+
+ v3d->regs = vc4_ioremap_regs(pdev, 0);
+ if (IS_ERR(v3d->regs))
+ return PTR_ERR(v3d->regs);
+
+ vc4->v3d = v3d;
+ v3d->vc4 = vc4;
+
+ v3d->clk = devm_clk_get(dev, NULL);
+ if (IS_ERR(v3d->clk)) {
+ int ret = PTR_ERR(v3d->clk);
+
+ if (ret == -ENOENT) {
+ /* bcm2835 didn't have a clock reference in the DT. */
+ ret = 0;
+ v3d->clk = NULL;
+ } else {
+ if (ret != -EPROBE_DEFER)
+ dev_err(dev, "Failed to get V3D clock: %d\n",
+ ret);
+ return ret;
+ }
+ }
+
+ if (V3D_READ(V3D_IDENT0) != V3D_EXPECTED_IDENT0) {
+ DRM_ERROR("V3D_IDENT0 read 0x%08x instead of 0x%08x\n",
+ V3D_READ(V3D_IDENT0), V3D_EXPECTED_IDENT0);
+ return -EINVAL;
+ }
+
+ ret = clk_prepare_enable(v3d->clk);
+ if (ret != 0)
+ return ret;
+
+ ret = vc4_allocate_bin_bo(drm);
+ if (ret) {
+ clk_disable_unprepare(v3d->clk);
+ return ret;
+ }
+
+ /* Reset the binner overflow address/size at setup, to be sure
+ * we don't reuse an old one.
+ */
+ V3D_WRITE(V3D_BPOA, 0);
+ V3D_WRITE(V3D_BPOS, 0);
+
+ vc4_v3d_init_hw(drm);
+
+ ret = drm_irq_install(drm, platform_get_irq(pdev, 0));
+ if (ret) {
+ DRM_ERROR("Failed to install IRQ handler\n");
+ return ret;
+ }
+
+ pm_runtime_set_active(dev);
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_set_autosuspend_delay(dev, 40); /* a little over 2 frames. */
+ pm_runtime_enable(dev);
+
+ return 0;
+}
+
+static void vc4_v3d_unbind(struct device *dev, struct device *master,
+ void *data)
+{
+ struct drm_device *drm = dev_get_drvdata(master);
+ struct vc4_dev *vc4 = to_vc4_dev(drm);
+
+ pm_runtime_disable(dev);
+
+ drm_irq_uninstall(drm);
+
+ /* Disable the binner's overflow memory address, so the next
+ * driver probe (if any) doesn't try to reuse our old
+ * allocation.
+ */
+ V3D_WRITE(V3D_BPOA, 0);
+ V3D_WRITE(V3D_BPOS, 0);
+
+ vc4->v3d = NULL;
+}
+
+static const struct dev_pm_ops vc4_v3d_pm_ops = {
+ SET_RUNTIME_PM_OPS(vc4_v3d_runtime_suspend, vc4_v3d_runtime_resume, NULL)
+};
+
+static const struct component_ops vc4_v3d_ops = {
+ .bind = vc4_v3d_bind,
+ .unbind = vc4_v3d_unbind,
+};
+
+static int vc4_v3d_dev_probe(struct platform_device *pdev)
+{
+ return component_add(&pdev->dev, &vc4_v3d_ops);
+}
+
+static int vc4_v3d_dev_remove(struct platform_device *pdev)
+{
+ component_del(&pdev->dev, &vc4_v3d_ops);
+ return 0;
+}
+
+static const struct of_device_id vc4_v3d_dt_match[] = {
+ { .compatible = "brcm,bcm2835-v3d" },
+ { .compatible = "brcm,cygnus-v3d" },
+ { .compatible = "brcm,vc4-v3d" },
+ {}
+};
+
+struct platform_driver vc4_v3d_driver = {
+ .probe = vc4_v3d_dev_probe,
+ .remove = vc4_v3d_dev_remove,
+ .driver = {
+ .name = "vc4_v3d",
+ .of_match_table = vc4_v3d_dt_match,
+ .pm = &vc4_v3d_pm_ops,
+ },
+};
diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c
new file mode 100644
index 000000000..eec76af49
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_validate.c
@@ -0,0 +1,939 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * DOC: Command list validator for VC4.
+ *
+ * Since the VC4 has no IOMMU between it and system memory, a user
+ * with access to execute command lists could escalate privilege by
+ * overwriting system memory (drawing to it as a framebuffer) or
+ * reading system memory it shouldn't (reading it as a vertex buffer
+ * or index buffer)
+ *
+ * We validate binner command lists to ensure that all accesses are
+ * within the bounds of the GEM objects referenced by the submitted
+ * job. It explicitly whitelists packets, and looks at the offsets in
+ * any address fields to make sure they're contained within the BOs
+ * they reference.
+ *
+ * Note that because CL validation is already reading the
+ * user-submitted CL and writing the validated copy out to the memory
+ * that the GPU will actually read, this is also where GEM relocation
+ * processing (turning BO references into actual addresses for the GPU
+ * to use) happens.
+ */
+
+#include "uapi/drm/vc4_drm.h"
+#include "vc4_drv.h"
+#include "vc4_packet.h"
+
+#define VALIDATE_ARGS \
+ struct vc4_exec_info *exec, \
+ void *validated, \
+ void *untrusted
+
+/** Return the width in pixels of a 64-byte microtile. */
+static uint32_t
+utile_width(int cpp)
+{
+ switch (cpp) {
+ case 1:
+ case 2:
+ return 8;
+ case 4:
+ return 4;
+ case 8:
+ return 2;
+ default:
+ DRM_ERROR("unknown cpp: %d\n", cpp);
+ return 1;
+ }
+}
+
+/** Return the height in pixels of a 64-byte microtile. */
+static uint32_t
+utile_height(int cpp)
+{
+ switch (cpp) {
+ case 1:
+ return 8;
+ case 2:
+ case 4:
+ case 8:
+ return 4;
+ default:
+ DRM_ERROR("unknown cpp: %d\n", cpp);
+ return 1;
+ }
+}
+
+/**
+ * size_is_lt() - Returns whether a miplevel of the given size will
+ * use the lineartile (LT) tiling layout rather than the normal T
+ * tiling layout.
+ * @width: Width in pixels of the miplevel
+ * @height: Height in pixels of the miplevel
+ * @cpp: Bytes per pixel of the pixel format
+ */
+static bool
+size_is_lt(uint32_t width, uint32_t height, int cpp)
+{
+ return (width <= 4 * utile_width(cpp) ||
+ height <= 4 * utile_height(cpp));
+}
+
+struct drm_gem_cma_object *
+vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex)
+{
+ struct drm_gem_cma_object *obj;
+ struct vc4_bo *bo;
+
+ if (hindex >= exec->bo_count) {
+ DRM_DEBUG("BO index %d greater than BO count %d\n",
+ hindex, exec->bo_count);
+ return NULL;
+ }
+ obj = exec->bo[hindex];
+ bo = to_vc4_bo(&obj->base);
+
+ if (bo->validated_shader) {
+ DRM_DEBUG("Trying to use shader BO as something other than "
+ "a shader\n");
+ return NULL;
+ }
+
+ return obj;
+}
+
+static struct drm_gem_cma_object *
+vc4_use_handle(struct vc4_exec_info *exec, uint32_t gem_handles_packet_index)
+{
+ return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index]);
+}
+
+static bool
+validate_bin_pos(struct vc4_exec_info *exec, void *untrusted, uint32_t pos)
+{
+ /* Note that the untrusted pointer passed to these functions is
+ * incremented past the packet byte.
+ */
+ return (untrusted - 1 == exec->bin_u + pos);
+}
+
+static uint32_t
+gl_shader_rec_size(uint32_t pointer_bits)
+{
+ uint32_t attribute_count = pointer_bits & 7;
+ bool extended = pointer_bits & 8;
+
+ if (attribute_count == 0)
+ attribute_count = 8;
+
+ if (extended)
+ return 100 + attribute_count * 4;
+ else
+ return 36 + attribute_count * 8;
+}
+
+bool
+vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
+ uint32_t offset, uint8_t tiling_format,
+ uint32_t width, uint32_t height, uint8_t cpp)
+{
+ uint32_t aligned_width, aligned_height, stride, size;
+ uint32_t utile_w = utile_width(cpp);
+ uint32_t utile_h = utile_height(cpp);
+
+ /* The shaded vertex format stores signed 12.4 fixed point
+ * (-2048,2047) offsets from the viewport center, so we should
+ * never have a render target larger than 4096. The texture
+ * unit can only sample from 2048x2048, so it's even more
+ * restricted. This lets us avoid worrying about overflow in
+ * our math.
+ */
+ if (width > 4096 || height > 4096) {
+ DRM_DEBUG("Surface dimensions (%d,%d) too large",
+ width, height);
+ return false;
+ }
+
+ switch (tiling_format) {
+ case VC4_TILING_FORMAT_LINEAR:
+ aligned_width = round_up(width, utile_w);
+ aligned_height = height;
+ break;
+ case VC4_TILING_FORMAT_T:
+ aligned_width = round_up(width, utile_w * 8);
+ aligned_height = round_up(height, utile_h * 8);
+ break;
+ case VC4_TILING_FORMAT_LT:
+ aligned_width = round_up(width, utile_w);
+ aligned_height = round_up(height, utile_h);
+ break;
+ default:
+ DRM_DEBUG("buffer tiling %d unsupported\n", tiling_format);
+ return false;
+ }
+
+ stride = aligned_width * cpp;
+ size = stride * aligned_height;
+
+ if (size + offset < size ||
+ size + offset > fbo->base.size) {
+ DRM_DEBUG("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n",
+ width, height,
+ aligned_width, aligned_height,
+ size, offset, fbo->base.size);
+ return false;
+ }
+
+ return true;
+}
+
+static int
+validate_flush(VALIDATE_ARGS)
+{
+ if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 1)) {
+ DRM_DEBUG("Bin CL must end with VC4_PACKET_FLUSH\n");
+ return -EINVAL;
+ }
+ exec->found_flush = true;
+
+ return 0;
+}
+
+static int
+validate_start_tile_binning(VALIDATE_ARGS)
+{
+ if (exec->found_start_tile_binning_packet) {
+ DRM_DEBUG("Duplicate VC4_PACKET_START_TILE_BINNING\n");
+ return -EINVAL;
+ }
+ exec->found_start_tile_binning_packet = true;
+
+ if (!exec->found_tile_binning_mode_config_packet) {
+ DRM_DEBUG("missing VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+validate_increment_semaphore(VALIDATE_ARGS)
+{
+ if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 2)) {
+ DRM_DEBUG("Bin CL must end with "
+ "VC4_PACKET_INCREMENT_SEMAPHORE\n");
+ return -EINVAL;
+ }
+ exec->found_increment_semaphore_packet = true;
+
+ return 0;
+}
+
+static int
+validate_indexed_prim_list(VALIDATE_ARGS)
+{
+ struct drm_gem_cma_object *ib;
+ uint32_t length = *(uint32_t *)(untrusted + 1);
+ uint32_t offset = *(uint32_t *)(untrusted + 5);
+ uint32_t max_index = *(uint32_t *)(untrusted + 9);
+ uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1;
+ struct vc4_shader_state *shader_state;
+
+ /* Check overflow condition */
+ if (exec->shader_state_count == 0) {
+ DRM_DEBUG("shader state must precede primitives\n");
+ return -EINVAL;
+ }
+ shader_state = &exec->shader_state[exec->shader_state_count - 1];
+
+ if (max_index > shader_state->max_index)
+ shader_state->max_index = max_index;
+
+ ib = vc4_use_handle(exec, 0);
+ if (!ib)
+ return -EINVAL;
+
+ exec->bin_dep_seqno = max(exec->bin_dep_seqno,
+ to_vc4_bo(&ib->base)->write_seqno);
+
+ if (offset > ib->base.size ||
+ (ib->base.size - offset) / index_size < length) {
+ DRM_DEBUG("IB access overflow (%d + %d*%d > %zd)\n",
+ offset, length, index_size, ib->base.size);
+ return -EINVAL;
+ }
+
+ *(uint32_t *)(validated + 5) = ib->paddr + offset;
+
+ return 0;
+}
+
+static int
+validate_gl_array_primitive(VALIDATE_ARGS)
+{
+ uint32_t length = *(uint32_t *)(untrusted + 1);
+ uint32_t base_index = *(uint32_t *)(untrusted + 5);
+ uint32_t max_index;
+ struct vc4_shader_state *shader_state;
+
+ /* Check overflow condition */
+ if (exec->shader_state_count == 0) {
+ DRM_DEBUG("shader state must precede primitives\n");
+ return -EINVAL;
+ }
+ shader_state = &exec->shader_state[exec->shader_state_count - 1];
+
+ if (length + base_index < length) {
+ DRM_DEBUG("primitive vertex count overflow\n");
+ return -EINVAL;
+ }
+ max_index = length + base_index - 1;
+
+ if (max_index > shader_state->max_index)
+ shader_state->max_index = max_index;
+
+ return 0;
+}
+
+static int
+validate_gl_shader_state(VALIDATE_ARGS)
+{
+ uint32_t i = exec->shader_state_count++;
+
+ if (i >= exec->shader_state_size) {
+ DRM_DEBUG("More requests for shader states than declared\n");
+ return -EINVAL;
+ }
+
+ exec->shader_state[i].addr = *(uint32_t *)untrusted;
+ exec->shader_state[i].max_index = 0;
+
+ if (exec->shader_state[i].addr & ~0xf) {
+ DRM_DEBUG("high bits set in GL shader rec reference\n");
+ return -EINVAL;
+ }
+
+ *(uint32_t *)validated = (exec->shader_rec_p +
+ exec->shader_state[i].addr);
+
+ exec->shader_rec_p +=
+ roundup(gl_shader_rec_size(exec->shader_state[i].addr), 16);
+
+ return 0;
+}
+
+static int
+validate_tile_binning_config(VALIDATE_ARGS)
+{
+ struct drm_device *dev = exec->exec_bo->base.dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ uint8_t flags;
+ uint32_t tile_state_size;
+ uint32_t tile_count, bin_addr;
+ int bin_slot;
+
+ if (exec->found_tile_binning_mode_config_packet) {
+ DRM_DEBUG("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
+ return -EINVAL;
+ }
+ exec->found_tile_binning_mode_config_packet = true;
+
+ exec->bin_tiles_x = *(uint8_t *)(untrusted + 12);
+ exec->bin_tiles_y = *(uint8_t *)(untrusted + 13);
+ tile_count = exec->bin_tiles_x * exec->bin_tiles_y;
+ flags = *(uint8_t *)(untrusted + 14);
+
+ if (exec->bin_tiles_x == 0 ||
+ exec->bin_tiles_y == 0) {
+ DRM_DEBUG("Tile binning config of %dx%d too small\n",
+ exec->bin_tiles_x, exec->bin_tiles_y);
+ return -EINVAL;
+ }
+
+ if (flags & (VC4_BIN_CONFIG_DB_NON_MS |
+ VC4_BIN_CONFIG_TILE_BUFFER_64BIT)) {
+ DRM_DEBUG("unsupported binning config flags 0x%02x\n", flags);
+ return -EINVAL;
+ }
+
+ bin_slot = vc4_v3d_get_bin_slot(vc4);
+ if (bin_slot < 0) {
+ if (bin_slot != -EINTR && bin_slot != -ERESTARTSYS) {
+ DRM_ERROR("Failed to allocate binner memory: %d\n",
+ bin_slot);
+ }
+ return bin_slot;
+ }
+
+ /* The slot we allocated will only be used by this job, and is
+ * free when the job completes rendering.
+ */
+ exec->bin_slots |= BIT(bin_slot);
+ bin_addr = vc4->bin_bo->base.paddr + bin_slot * vc4->bin_alloc_size;
+
+ /* The tile state data array is 48 bytes per tile, and we put it at
+ * the start of a BO containing both it and the tile alloc.
+ */
+ tile_state_size = 48 * tile_count;
+
+ /* Since the tile alloc array will follow us, align. */
+ exec->tile_alloc_offset = bin_addr + roundup(tile_state_size, 4096);
+
+ *(uint8_t *)(validated + 14) =
+ ((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK |
+ VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK)) |
+ VC4_BIN_CONFIG_AUTO_INIT_TSDA |
+ VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32,
+ VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE) |
+ VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128,
+ VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE));
+
+ /* tile alloc address. */
+ *(uint32_t *)(validated + 0) = exec->tile_alloc_offset;
+ /* tile alloc size. */
+ *(uint32_t *)(validated + 4) = (bin_addr + vc4->bin_alloc_size -
+ exec->tile_alloc_offset);
+ /* tile state address. */
+ *(uint32_t *)(validated + 8) = bin_addr;
+
+ return 0;
+}
+
+static int
+validate_gem_handles(VALIDATE_ARGS)
+{
+ memcpy(exec->bo_index, untrusted, sizeof(exec->bo_index));
+ return 0;
+}
+
+#define VC4_DEFINE_PACKET(packet, func) \
+ [packet] = { packet ## _SIZE, #packet, func }
+
+static const struct cmd_info {
+ uint16_t len;
+ const char *name;
+ int (*func)(struct vc4_exec_info *exec, void *validated,
+ void *untrusted);
+} cmd_info[] = {
+ VC4_DEFINE_PACKET(VC4_PACKET_HALT, NULL),
+ VC4_DEFINE_PACKET(VC4_PACKET_NOP, NULL),
+ VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, validate_flush),
+ VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, NULL),
+ VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING,
+ validate_start_tile_binning),
+ VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE,
+ validate_increment_semaphore),
+
+ VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE,
+ validate_indexed_prim_list),
+ VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE,
+ validate_gl_array_primitive),
+
+ VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, NULL),
+
+ VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, validate_gl_shader_state),
+
+ VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL),
+ VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL),
+ VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, NULL),
+ VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, NULL),
+ VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, NULL),
+ VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, NULL),
+ VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, NULL),
+ VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, NULL),
+ VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, NULL),
+ /* Note: The docs say this was also 105, but it was 106 in the
+ * initial userland code drop.
+ */
+ VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, NULL),
+
+ VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG,
+ validate_tile_binning_config),
+
+ VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, validate_gem_handles),
+};
+
+int
+vc4_validate_bin_cl(struct drm_device *dev,
+ void *validated,
+ void *unvalidated,
+ struct vc4_exec_info *exec)
+{
+ uint32_t len = exec->args->bin_cl_size;
+ uint32_t dst_offset = 0;
+ uint32_t src_offset = 0;
+
+ while (src_offset < len) {
+ void *dst_pkt = validated + dst_offset;
+ void *src_pkt = unvalidated + src_offset;
+ u8 cmd = *(uint8_t *)src_pkt;
+ const struct cmd_info *info;
+
+ if (cmd >= ARRAY_SIZE(cmd_info)) {
+ DRM_DEBUG("0x%08x: packet %d out of bounds\n",
+ src_offset, cmd);
+ return -EINVAL;
+ }
+
+ info = &cmd_info[cmd];
+ if (!info->name) {
+ DRM_DEBUG("0x%08x: packet %d invalid\n",
+ src_offset, cmd);
+ return -EINVAL;
+ }
+
+ if (src_offset + info->len > len) {
+ DRM_DEBUG("0x%08x: packet %d (%s) length 0x%08x "
+ "exceeds bounds (0x%08x)\n",
+ src_offset, cmd, info->name, info->len,
+ src_offset + len);
+ return -EINVAL;
+ }
+
+ if (cmd != VC4_PACKET_GEM_HANDLES)
+ memcpy(dst_pkt, src_pkt, info->len);
+
+ if (info->func && info->func(exec,
+ dst_pkt + 1,
+ src_pkt + 1)) {
+ DRM_DEBUG("0x%08x: packet %d (%s) failed to validate\n",
+ src_offset, cmd, info->name);
+ return -EINVAL;
+ }
+
+ src_offset += info->len;
+ /* GEM handle loading doesn't produce HW packets. */
+ if (cmd != VC4_PACKET_GEM_HANDLES)
+ dst_offset += info->len;
+
+ /* When the CL hits halt, it'll stop reading anything else. */
+ if (cmd == VC4_PACKET_HALT)
+ break;
+ }
+
+ exec->ct0ea = exec->ct0ca + dst_offset;
+
+ if (!exec->found_start_tile_binning_packet) {
+ DRM_DEBUG("Bin CL missing VC4_PACKET_START_TILE_BINNING\n");
+ return -EINVAL;
+ }
+
+ /* The bin CL must be ended with INCREMENT_SEMAPHORE and FLUSH. The
+ * semaphore is used to trigger the render CL to start up, and the
+ * FLUSH is what caps the bin lists with
+ * VC4_PACKET_RETURN_FROM_SUB_LIST (so they jump back to the main
+ * render CL when they get called to) and actually triggers the queued
+ * semaphore increment.
+ */
+ if (!exec->found_increment_semaphore_packet || !exec->found_flush) {
+ DRM_DEBUG("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + "
+ "VC4_PACKET_FLUSH\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static bool
+reloc_tex(struct vc4_exec_info *exec,
+ void *uniform_data_u,
+ struct vc4_texture_sample_info *sample,
+ uint32_t texture_handle_index, bool is_cs)
+{
+ struct drm_gem_cma_object *tex;
+ uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]);
+ uint32_t p1 = *(uint32_t *)(uniform_data_u + sample->p_offset[1]);
+ uint32_t p2 = (sample->p_offset[2] != ~0 ?
+ *(uint32_t *)(uniform_data_u + sample->p_offset[2]) : 0);
+ uint32_t p3 = (sample->p_offset[3] != ~0 ?
+ *(uint32_t *)(uniform_data_u + sample->p_offset[3]) : 0);
+ uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0];
+ uint32_t offset = p0 & VC4_TEX_P0_OFFSET_MASK;
+ uint32_t miplevels = VC4_GET_FIELD(p0, VC4_TEX_P0_MIPLVLS);
+ uint32_t width = VC4_GET_FIELD(p1, VC4_TEX_P1_WIDTH);
+ uint32_t height = VC4_GET_FIELD(p1, VC4_TEX_P1_HEIGHT);
+ uint32_t cpp, tiling_format, utile_w, utile_h;
+ uint32_t i;
+ uint32_t cube_map_stride = 0;
+ enum vc4_texture_data_type type;
+
+ tex = vc4_use_bo(exec, texture_handle_index);
+ if (!tex)
+ return false;
+
+ if (sample->is_direct) {
+ uint32_t remaining_size = tex->base.size - p0;
+
+ if (p0 > tex->base.size - 4) {
+ DRM_DEBUG("UBO offset greater than UBO size\n");
+ goto fail;
+ }
+ if (p1 > remaining_size - 4) {
+ DRM_DEBUG("UBO clamp would allow reads "
+ "outside of UBO\n");
+ goto fail;
+ }
+ *validated_p0 = tex->paddr + p0;
+ return true;
+ }
+
+ if (width == 0)
+ width = 2048;
+ if (height == 0)
+ height = 2048;
+
+ if (p0 & VC4_TEX_P0_CMMODE_MASK) {
+ if (VC4_GET_FIELD(p2, VC4_TEX_P2_PTYPE) ==
+ VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE)
+ cube_map_stride = p2 & VC4_TEX_P2_CMST_MASK;
+ if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) ==
+ VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) {
+ if (cube_map_stride) {
+ DRM_DEBUG("Cube map stride set twice\n");
+ goto fail;
+ }
+
+ cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK;
+ }
+ if (!cube_map_stride) {
+ DRM_DEBUG("Cube map stride not set\n");
+ goto fail;
+ }
+ }
+
+ type = (VC4_GET_FIELD(p0, VC4_TEX_P0_TYPE) |
+ (VC4_GET_FIELD(p1, VC4_TEX_P1_TYPE4) << 4));
+
+ switch (type) {
+ case VC4_TEXTURE_TYPE_RGBA8888:
+ case VC4_TEXTURE_TYPE_RGBX8888:
+ case VC4_TEXTURE_TYPE_RGBA32R:
+ cpp = 4;
+ break;
+ case VC4_TEXTURE_TYPE_RGBA4444:
+ case VC4_TEXTURE_TYPE_RGBA5551:
+ case VC4_TEXTURE_TYPE_RGB565:
+ case VC4_TEXTURE_TYPE_LUMALPHA:
+ case VC4_TEXTURE_TYPE_S16F:
+ case VC4_TEXTURE_TYPE_S16:
+ cpp = 2;
+ break;
+ case VC4_TEXTURE_TYPE_LUMINANCE:
+ case VC4_TEXTURE_TYPE_ALPHA:
+ case VC4_TEXTURE_TYPE_S8:
+ cpp = 1;
+ break;
+ case VC4_TEXTURE_TYPE_ETC1:
+ /* ETC1 is arranged as 64-bit blocks, where each block is 4x4
+ * pixels.
+ */
+ cpp = 8;
+ width = (width + 3) >> 2;
+ height = (height + 3) >> 2;
+ break;
+ case VC4_TEXTURE_TYPE_BW1:
+ case VC4_TEXTURE_TYPE_A4:
+ case VC4_TEXTURE_TYPE_A1:
+ case VC4_TEXTURE_TYPE_RGBA64:
+ case VC4_TEXTURE_TYPE_YUV422R:
+ default:
+ DRM_DEBUG("Texture format %d unsupported\n", type);
+ goto fail;
+ }
+ utile_w = utile_width(cpp);
+ utile_h = utile_height(cpp);
+
+ if (type == VC4_TEXTURE_TYPE_RGBA32R) {
+ tiling_format = VC4_TILING_FORMAT_LINEAR;
+ } else {
+ if (size_is_lt(width, height, cpp))
+ tiling_format = VC4_TILING_FORMAT_LT;
+ else
+ tiling_format = VC4_TILING_FORMAT_T;
+ }
+
+ if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5,
+ tiling_format, width, height, cpp)) {
+ goto fail;
+ }
+
+ /* The mipmap levels are stored before the base of the texture. Make
+ * sure there is actually space in the BO.
+ */
+ for (i = 1; i <= miplevels; i++) {
+ uint32_t level_width = max(width >> i, 1u);
+ uint32_t level_height = max(height >> i, 1u);
+ uint32_t aligned_width, aligned_height;
+ uint32_t level_size;
+
+ /* Once the levels get small enough, they drop from T to LT. */
+ if (tiling_format == VC4_TILING_FORMAT_T &&
+ size_is_lt(level_width, level_height, cpp)) {
+ tiling_format = VC4_TILING_FORMAT_LT;
+ }
+
+ switch (tiling_format) {
+ case VC4_TILING_FORMAT_T:
+ aligned_width = round_up(level_width, utile_w * 8);
+ aligned_height = round_up(level_height, utile_h * 8);
+ break;
+ case VC4_TILING_FORMAT_LT:
+ aligned_width = round_up(level_width, utile_w);
+ aligned_height = round_up(level_height, utile_h);
+ break;
+ default:
+ aligned_width = round_up(level_width, utile_w);
+ aligned_height = level_height;
+ break;
+ }
+
+ level_size = aligned_width * cpp * aligned_height;
+
+ if (offset < level_size) {
+ DRM_DEBUG("Level %d (%dx%d -> %dx%d) size %db "
+ "overflowed buffer bounds (offset %d)\n",
+ i, level_width, level_height,
+ aligned_width, aligned_height,
+ level_size, offset);
+ goto fail;
+ }
+
+ offset -= level_size;
+ }
+
+ *validated_p0 = tex->paddr + p0;
+
+ if (is_cs) {
+ exec->bin_dep_seqno = max(exec->bin_dep_seqno,
+ to_vc4_bo(&tex->base)->write_seqno);
+ }
+
+ return true;
+ fail:
+ DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);
+ DRM_INFO("Texture p1 at %d: 0x%08x\n", sample->p_offset[1], p1);
+ DRM_INFO("Texture p2 at %d: 0x%08x\n", sample->p_offset[2], p2);
+ DRM_INFO("Texture p3 at %d: 0x%08x\n", sample->p_offset[3], p3);
+ return false;
+}
+
+static int
+validate_gl_shader_rec(struct drm_device *dev,
+ struct vc4_exec_info *exec,
+ struct vc4_shader_state *state)
+{
+ uint32_t *src_handles;
+ void *pkt_u, *pkt_v;
+ static const uint32_t shader_reloc_offsets[] = {
+ 4, /* fs */
+ 16, /* vs */
+ 28, /* cs */
+ };
+ uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets);
+ struct drm_gem_cma_object *bo[ARRAY_SIZE(shader_reloc_offsets) + 8];
+ uint32_t nr_attributes, nr_relocs, packet_size;
+ int i;
+
+ nr_attributes = state->addr & 0x7;
+ if (nr_attributes == 0)
+ nr_attributes = 8;
+ packet_size = gl_shader_rec_size(state->addr);
+
+ nr_relocs = ARRAY_SIZE(shader_reloc_offsets) + nr_attributes;
+ if (nr_relocs * 4 > exec->shader_rec_size) {
+ DRM_DEBUG("overflowed shader recs reading %d handles "
+ "from %d bytes left\n",
+ nr_relocs, exec->shader_rec_size);
+ return -EINVAL;
+ }
+ src_handles = exec->shader_rec_u;
+ exec->shader_rec_u += nr_relocs * 4;
+ exec->shader_rec_size -= nr_relocs * 4;
+
+ if (packet_size > exec->shader_rec_size) {
+ DRM_DEBUG("overflowed shader recs copying %db packet "
+ "from %d bytes left\n",
+ packet_size, exec->shader_rec_size);
+ return -EINVAL;
+ }
+ pkt_u = exec->shader_rec_u;
+ pkt_v = exec->shader_rec_v;
+ memcpy(pkt_v, pkt_u, packet_size);
+ exec->shader_rec_u += packet_size;
+ /* Shader recs have to be aligned to 16 bytes (due to the attribute
+ * flags being in the low bytes), so round the next validated shader
+ * rec address up. This should be safe, since we've got so many
+ * relocations in a shader rec packet.
+ */
+ BUG_ON(roundup(packet_size, 16) - packet_size > nr_relocs * 4);
+ exec->shader_rec_v += roundup(packet_size, 16);
+ exec->shader_rec_size -= packet_size;
+
+ for (i = 0; i < shader_reloc_count; i++) {
+ if (src_handles[i] > exec->bo_count) {
+ DRM_DEBUG("Shader handle %d too big\n", src_handles[i]);
+ return -EINVAL;
+ }
+
+ bo[i] = exec->bo[src_handles[i]];
+ if (!bo[i])
+ return -EINVAL;
+ }
+ for (i = shader_reloc_count; i < nr_relocs; i++) {
+ bo[i] = vc4_use_bo(exec, src_handles[i]);
+ if (!bo[i])
+ return -EINVAL;
+ }
+
+ if (((*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD) == 0) !=
+ to_vc4_bo(&bo[0]->base)->validated_shader->is_threaded) {
+ DRM_DEBUG("Thread mode of CL and FS do not match\n");
+ return -EINVAL;
+ }
+
+ if (to_vc4_bo(&bo[1]->base)->validated_shader->is_threaded ||
+ to_vc4_bo(&bo[2]->base)->validated_shader->is_threaded) {
+ DRM_DEBUG("cs and vs cannot be threaded\n");
+ return -EINVAL;
+ }
+
+ for (i = 0; i < shader_reloc_count; i++) {
+ struct vc4_validated_shader_info *validated_shader;
+ uint32_t o = shader_reloc_offsets[i];
+ uint32_t src_offset = *(uint32_t *)(pkt_u + o);
+ uint32_t *texture_handles_u;
+ void *uniform_data_u;
+ uint32_t tex, uni;
+
+ *(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset;
+
+ if (src_offset != 0) {
+ DRM_DEBUG("Shaders must be at offset 0 of "
+ "the BO.\n");
+ return -EINVAL;
+ }
+
+ validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader;
+ if (!validated_shader)
+ return -EINVAL;
+
+ if (validated_shader->uniforms_src_size >
+ exec->uniforms_size) {
+ DRM_DEBUG("Uniforms src buffer overflow\n");
+ return -EINVAL;
+ }
+
+ texture_handles_u = exec->uniforms_u;
+ uniform_data_u = (texture_handles_u +
+ validated_shader->num_texture_samples);
+
+ memcpy(exec->uniforms_v, uniform_data_u,
+ validated_shader->uniforms_size);
+
+ for (tex = 0;
+ tex < validated_shader->num_texture_samples;
+ tex++) {
+ if (!reloc_tex(exec,
+ uniform_data_u,
+ &validated_shader->texture_samples[tex],
+ texture_handles_u[tex],
+ i == 2)) {
+ return -EINVAL;
+ }
+ }
+
+ /* Fill in the uniform slots that need this shader's
+ * start-of-uniforms address (used for resetting the uniform
+ * stream in the presence of control flow).
+ */
+ for (uni = 0;
+ uni < validated_shader->num_uniform_addr_offsets;
+ uni++) {
+ uint32_t o = validated_shader->uniform_addr_offsets[uni];
+ ((uint32_t *)exec->uniforms_v)[o] = exec->uniforms_p;
+ }
+
+ *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
+
+ exec->uniforms_u += validated_shader->uniforms_src_size;
+ exec->uniforms_v += validated_shader->uniforms_size;
+ exec->uniforms_p += validated_shader->uniforms_size;
+ }
+
+ for (i = 0; i < nr_attributes; i++) {
+ struct drm_gem_cma_object *vbo =
+ bo[ARRAY_SIZE(shader_reloc_offsets) + i];
+ uint32_t o = 36 + i * 8;
+ uint32_t offset = *(uint32_t *)(pkt_u + o + 0);
+ uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1;
+ uint32_t stride = *(uint8_t *)(pkt_u + o + 5);
+ uint32_t max_index;
+
+ exec->bin_dep_seqno = max(exec->bin_dep_seqno,
+ to_vc4_bo(&vbo->base)->write_seqno);
+
+ if (state->addr & 0x8)
+ stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff;
+
+ if (vbo->base.size < offset ||
+ vbo->base.size - offset < attr_size) {
+ DRM_DEBUG("BO offset overflow (%d + %d > %zu)\n",
+ offset, attr_size, vbo->base.size);
+ return -EINVAL;
+ }
+
+ if (stride != 0) {
+ max_index = ((vbo->base.size - offset - attr_size) /
+ stride);
+ if (state->max_index > max_index) {
+ DRM_DEBUG("primitives use index %d out of "
+ "supplied %d\n",
+ state->max_index, max_index);
+ return -EINVAL;
+ }
+ }
+
+ *(uint32_t *)(pkt_v + o) = vbo->paddr + offset;
+ }
+
+ return 0;
+}
+
+int
+vc4_validate_shader_recs(struct drm_device *dev,
+ struct vc4_exec_info *exec)
+{
+ uint32_t i;
+ int ret = 0;
+
+ for (i = 0; i < exec->shader_state_count; i++) {
+ ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
new file mode 100644
index 000000000..7cf82b071
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
@@ -0,0 +1,950 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * DOC: Shader validator for VC4.
+ *
+ * Since the VC4 has no IOMMU between it and system memory, a user
+ * with access to execute shaders could escalate privilege by
+ * overwriting system memory (using the VPM write address register in
+ * the general-purpose DMA mode) or reading system memory it shouldn't
+ * (reading it as a texture, uniform data, or direct-addressed TMU
+ * lookup).
+ *
+ * The shader validator walks over a shader's BO, ensuring that its
+ * accesses are appropriately bounded, and recording where texture
+ * accesses are made so that we can do relocations for them in the
+ * uniform stream.
+ *
+ * Shader BO are immutable for their lifetimes (enforced by not
+ * allowing mmaps, GEM prime export, or rendering to from a CL), so
+ * this validation is only performed at BO creation time.
+ */
+
+#include "vc4_drv.h"
+#include "vc4_qpu_defines.h"
+
+#define LIVE_REG_COUNT (32 + 32 + 4)
+
+struct vc4_shader_validation_state {
+ /* Current IP being validated. */
+ uint32_t ip;
+
+ /* IP at the end of the BO, do not read shader[max_ip] */
+ uint32_t max_ip;
+
+ uint64_t *shader;
+
+ struct vc4_texture_sample_info tmu_setup[2];
+ int tmu_write_count[2];
+
+ /* For registers that were last written to by a MIN instruction with
+ * one argument being a uniform, the address of the uniform.
+ * Otherwise, ~0.
+ *
+ * This is used for the validation of direct address memory reads.
+ */
+ uint32_t live_min_clamp_offsets[LIVE_REG_COUNT];
+ bool live_max_clamp_regs[LIVE_REG_COUNT];
+ uint32_t live_immediates[LIVE_REG_COUNT];
+
+ /* Bitfield of which IPs are used as branch targets.
+ *
+ * Used for validation that the uniform stream is updated at the right
+ * points and clearing the texturing/clamping state.
+ */
+ unsigned long *branch_targets;
+
+ /* Set when entering a basic block, and cleared when the uniform
+ * address update is found. This is used to make sure that we don't
+ * read uniforms when the address is undefined.
+ */
+ bool needs_uniform_address_update;
+
+ /* Set when we find a backwards branch. If the branch is backwards,
+ * the taraget is probably doing an address reset to read uniforms,
+ * and so we need to be sure that a uniforms address is present in the
+ * stream, even if the shader didn't need to read uniforms in later
+ * basic blocks.
+ */
+ bool needs_uniform_address_for_loop;
+
+ /* Set when we find an instruction writing the top half of the
+ * register files. If we allowed writing the unusable regs in
+ * a threaded shader, then the other shader running on our
+ * QPU's clamp validation would be invalid.
+ */
+ bool all_registers_used;
+};
+
+static uint32_t
+waddr_to_live_reg_index(uint32_t waddr, bool is_b)
+{
+ if (waddr < 32) {
+ if (is_b)
+ return 32 + waddr;
+ else
+ return waddr;
+ } else if (waddr <= QPU_W_ACC3) {
+ return 64 + waddr - QPU_W_ACC0;
+ } else {
+ return ~0;
+ }
+}
+
+static uint32_t
+raddr_add_a_to_live_reg_index(uint64_t inst)
+{
+ uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+ uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
+ uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
+ uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+
+ if (add_a == QPU_MUX_A)
+ return raddr_a;
+ else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM)
+ return 32 + raddr_b;
+ else if (add_a <= QPU_MUX_R3)
+ return 64 + add_a;
+ else
+ return ~0;
+}
+
+static bool
+live_reg_is_upper_half(uint32_t lri)
+{
+ return (lri >= 16 && lri < 32) ||
+ (lri >= 32 + 16 && lri < 32 + 32);
+}
+
+static bool
+is_tmu_submit(uint32_t waddr)
+{
+ return (waddr == QPU_W_TMU0_S ||
+ waddr == QPU_W_TMU1_S);
+}
+
+static bool
+is_tmu_write(uint32_t waddr)
+{
+ return (waddr >= QPU_W_TMU0_S &&
+ waddr <= QPU_W_TMU1_B);
+}
+
+static bool
+record_texture_sample(struct vc4_validated_shader_info *validated_shader,
+ struct vc4_shader_validation_state *validation_state,
+ int tmu)
+{
+ uint32_t s = validated_shader->num_texture_samples;
+ int i;
+ struct vc4_texture_sample_info *temp_samples;
+
+ temp_samples = krealloc(validated_shader->texture_samples,
+ (s + 1) * sizeof(*temp_samples),
+ GFP_KERNEL);
+ if (!temp_samples)
+ return false;
+
+ memcpy(&temp_samples[s],
+ &validation_state->tmu_setup[tmu],
+ sizeof(*temp_samples));
+
+ validated_shader->num_texture_samples = s + 1;
+ validated_shader->texture_samples = temp_samples;
+
+ for (i = 0; i < 4; i++)
+ validation_state->tmu_setup[tmu].p_offset[i] = ~0;
+
+ return true;
+}
+
+static bool
+check_tmu_write(struct vc4_validated_shader_info *validated_shader,
+ struct vc4_shader_validation_state *validation_state,
+ bool is_mul)
+{
+ uint64_t inst = validation_state->shader[validation_state->ip];
+ uint32_t waddr = (is_mul ?
+ QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
+ QPU_GET_FIELD(inst, QPU_WADDR_ADD));
+ uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
+ uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+ int tmu = waddr > QPU_W_TMU0_B;
+ bool submit = is_tmu_submit(waddr);
+ bool is_direct = submit && validation_state->tmu_write_count[tmu] == 0;
+ uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+
+ if (is_direct) {
+ uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
+ uint32_t clamp_reg, clamp_offset;
+
+ if (sig == QPU_SIG_SMALL_IMM) {
+ DRM_DEBUG("direct TMU read used small immediate\n");
+ return false;
+ }
+
+ /* Make sure that this texture load is an add of the base
+ * address of the UBO to a clamped offset within the UBO.
+ */
+ if (is_mul ||
+ QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
+ DRM_DEBUG("direct TMU load wasn't an add\n");
+ return false;
+ }
+
+ /* We assert that the clamped address is the first
+ * argument, and the UBO base address is the second argument.
+ * This is arbitrary, but simpler than supporting flipping the
+ * two either way.
+ */
+ clamp_reg = raddr_add_a_to_live_reg_index(inst);
+ if (clamp_reg == ~0) {
+ DRM_DEBUG("direct TMU load wasn't clamped\n");
+ return false;
+ }
+
+ clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg];
+ if (clamp_offset == ~0) {
+ DRM_DEBUG("direct TMU load wasn't clamped\n");
+ return false;
+ }
+
+ /* Store the clamp value's offset in p1 (see reloc_tex() in
+ * vc4_validate.c).
+ */
+ validation_state->tmu_setup[tmu].p_offset[1] =
+ clamp_offset;
+
+ if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
+ !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
+ DRM_DEBUG("direct TMU load didn't add to a uniform\n");
+ return false;
+ }
+
+ validation_state->tmu_setup[tmu].is_direct = true;
+ } else {
+ if (raddr_a == QPU_R_UNIF || (sig != QPU_SIG_SMALL_IMM &&
+ raddr_b == QPU_R_UNIF)) {
+ DRM_DEBUG("uniform read in the same instruction as "
+ "texture setup.\n");
+ return false;
+ }
+ }
+
+ if (validation_state->tmu_write_count[tmu] >= 4) {
+ DRM_DEBUG("TMU%d got too many parameters before dispatch\n",
+ tmu);
+ return false;
+ }
+ validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] =
+ validated_shader->uniforms_size;
+ validation_state->tmu_write_count[tmu]++;
+ /* Since direct uses a RADDR uniform reference, it will get counted in
+ * check_instruction_reads()
+ */
+ if (!is_direct) {
+ if (validation_state->needs_uniform_address_update) {
+ DRM_DEBUG("Texturing with undefined uniform address\n");
+ return false;
+ }
+
+ validated_shader->uniforms_size += 4;
+ }
+
+ if (submit) {
+ if (!record_texture_sample(validated_shader,
+ validation_state, tmu)) {
+ return false;
+ }
+
+ validation_state->tmu_write_count[tmu] = 0;
+ }
+
+ return true;
+}
+
+static bool require_uniform_address_uniform(struct vc4_validated_shader_info *validated_shader)
+{
+ uint32_t o = validated_shader->num_uniform_addr_offsets;
+ uint32_t num_uniforms = validated_shader->uniforms_size / 4;
+
+ validated_shader->uniform_addr_offsets =
+ krealloc(validated_shader->uniform_addr_offsets,
+ (o + 1) *
+ sizeof(*validated_shader->uniform_addr_offsets),
+ GFP_KERNEL);
+ if (!validated_shader->uniform_addr_offsets)
+ return false;
+
+ validated_shader->uniform_addr_offsets[o] = num_uniforms;
+ validated_shader->num_uniform_addr_offsets++;
+
+ return true;
+}
+
+static bool
+validate_uniform_address_write(struct vc4_validated_shader_info *validated_shader,
+ struct vc4_shader_validation_state *validation_state,
+ bool is_mul)
+{
+ uint64_t inst = validation_state->shader[validation_state->ip];
+ u32 add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
+ u32 raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
+ u32 raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+ u32 add_lri = raddr_add_a_to_live_reg_index(inst);
+ /* We want our reset to be pointing at whatever uniform follows the
+ * uniforms base address.
+ */
+ u32 expected_offset = validated_shader->uniforms_size + 4;
+
+ /* We only support absolute uniform address changes, and we
+ * require that they be in the current basic block before any
+ * of its uniform reads.
+ *
+ * One could potentially emit more efficient QPU code, by
+ * noticing that (say) an if statement does uniform control
+ * flow for all threads and that the if reads the same number
+ * of uniforms on each side. However, this scheme is easy to
+ * validate so it's all we allow for now.
+ */
+ switch (QPU_GET_FIELD(inst, QPU_SIG)) {
+ case QPU_SIG_NONE:
+ case QPU_SIG_SCOREBOARD_UNLOCK:
+ case QPU_SIG_COLOR_LOAD:
+ case QPU_SIG_LOAD_TMU0:
+ case QPU_SIG_LOAD_TMU1:
+ break;
+ default:
+ DRM_DEBUG("uniforms address change must be "
+ "normal math\n");
+ return false;
+ }
+
+ if (is_mul || QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
+ DRM_DEBUG("Uniform address reset must be an ADD.\n");
+ return false;
+ }
+
+ if (QPU_GET_FIELD(inst, QPU_COND_ADD) != QPU_COND_ALWAYS) {
+ DRM_DEBUG("Uniform address reset must be unconditional.\n");
+ return false;
+ }
+
+ if (QPU_GET_FIELD(inst, QPU_PACK) != QPU_PACK_A_NOP &&
+ !(inst & QPU_PM)) {
+ DRM_DEBUG("No packing allowed on uniforms reset\n");
+ return false;
+ }
+
+ if (add_lri == -1) {
+ DRM_DEBUG("First argument of uniform address write must be "
+ "an immediate value.\n");
+ return false;
+ }
+
+ if (validation_state->live_immediates[add_lri] != expected_offset) {
+ DRM_DEBUG("Resetting uniforms with offset %db instead of %db\n",
+ validation_state->live_immediates[add_lri],
+ expected_offset);
+ return false;
+ }
+
+ if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
+ !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
+ DRM_DEBUG("Second argument of uniform address write must be "
+ "a uniform.\n");
+ return false;
+ }
+
+ validation_state->needs_uniform_address_update = false;
+ validation_state->needs_uniform_address_for_loop = false;
+ return require_uniform_address_uniform(validated_shader);
+}
+
+static bool
+check_reg_write(struct vc4_validated_shader_info *validated_shader,
+ struct vc4_shader_validation_state *validation_state,
+ bool is_mul)
+{
+ uint64_t inst = validation_state->shader[validation_state->ip];
+ uint32_t waddr = (is_mul ?
+ QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
+ QPU_GET_FIELD(inst, QPU_WADDR_ADD));
+ uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+ bool ws = inst & QPU_WS;
+ bool is_b = is_mul ^ ws;
+ u32 lri = waddr_to_live_reg_index(waddr, is_b);
+
+ if (lri != -1) {
+ uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
+ uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL);
+
+ if (sig == QPU_SIG_LOAD_IMM &&
+ QPU_GET_FIELD(inst, QPU_PACK) == QPU_PACK_A_NOP &&
+ ((is_mul && cond_mul == QPU_COND_ALWAYS) ||
+ (!is_mul && cond_add == QPU_COND_ALWAYS))) {
+ validation_state->live_immediates[lri] =
+ QPU_GET_FIELD(inst, QPU_LOAD_IMM);
+ } else {
+ validation_state->live_immediates[lri] = ~0;
+ }
+
+ if (live_reg_is_upper_half(lri))
+ validation_state->all_registers_used = true;
+ }
+
+ switch (waddr) {
+ case QPU_W_UNIFORMS_ADDRESS:
+ if (is_b) {
+ DRM_DEBUG("relative uniforms address change "
+ "unsupported\n");
+ return false;
+ }
+
+ return validate_uniform_address_write(validated_shader,
+ validation_state,
+ is_mul);
+
+ case QPU_W_TLB_COLOR_MS:
+ case QPU_W_TLB_COLOR_ALL:
+ case QPU_W_TLB_Z:
+ /* These only interact with the tile buffer, not main memory,
+ * so they're safe.
+ */
+ return true;
+
+ case QPU_W_TMU0_S:
+ case QPU_W_TMU0_T:
+ case QPU_W_TMU0_R:
+ case QPU_W_TMU0_B:
+ case QPU_W_TMU1_S:
+ case QPU_W_TMU1_T:
+ case QPU_W_TMU1_R:
+ case QPU_W_TMU1_B:
+ return check_tmu_write(validated_shader, validation_state,
+ is_mul);
+
+ case QPU_W_HOST_INT:
+ case QPU_W_TMU_NOSWAP:
+ case QPU_W_TLB_ALPHA_MASK:
+ case QPU_W_MUTEX_RELEASE:
+ /* XXX: I haven't thought about these, so don't support them
+ * for now.
+ */
+ DRM_DEBUG("Unsupported waddr %d\n", waddr);
+ return false;
+
+ case QPU_W_VPM_ADDR:
+ DRM_DEBUG("General VPM DMA unsupported\n");
+ return false;
+
+ case QPU_W_VPM:
+ case QPU_W_VPMVCD_SETUP:
+ /* We allow VPM setup in general, even including VPM DMA
+ * configuration setup, because the (unsafe) DMA can only be
+ * triggered by QPU_W_VPM_ADDR writes.
+ */
+ return true;
+
+ case QPU_W_TLB_STENCIL_SETUP:
+ return true;
+ }
+
+ return true;
+}
+
+static void
+track_live_clamps(struct vc4_validated_shader_info *validated_shader,
+ struct vc4_shader_validation_state *validation_state)
+{
+ uint64_t inst = validation_state->shader[validation_state->ip];
+ uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
+ uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
+ uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
+ uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
+ uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
+ uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
+ uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
+ uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+ uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+ bool ws = inst & QPU_WS;
+ uint32_t lri_add_a, lri_add, lri_mul;
+ bool add_a_is_min_0;
+
+ /* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0),
+ * before we clear previous live state.
+ */
+ lri_add_a = raddr_add_a_to_live_reg_index(inst);
+ add_a_is_min_0 = (lri_add_a != ~0 &&
+ validation_state->live_max_clamp_regs[lri_add_a]);
+
+ /* Clear live state for registers written by our instruction. */
+ lri_add = waddr_to_live_reg_index(waddr_add, ws);
+ lri_mul = waddr_to_live_reg_index(waddr_mul, !ws);
+ if (lri_mul != ~0) {
+ validation_state->live_max_clamp_regs[lri_mul] = false;
+ validation_state->live_min_clamp_offsets[lri_mul] = ~0;
+ }
+ if (lri_add != ~0) {
+ validation_state->live_max_clamp_regs[lri_add] = false;
+ validation_state->live_min_clamp_offsets[lri_add] = ~0;
+ } else {
+ /* Nothing further to do for live tracking, since only ADDs
+ * generate new live clamp registers.
+ */
+ return;
+ }
+
+ /* Now, handle remaining live clamp tracking for the ADD operation. */
+
+ if (cond_add != QPU_COND_ALWAYS)
+ return;
+
+ if (op_add == QPU_A_MAX) {
+ /* Track live clamps of a value to a minimum of 0 (in either
+ * arg).
+ */
+ if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 ||
+ (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) {
+ return;
+ }
+
+ validation_state->live_max_clamp_regs[lri_add] = true;
+ } else if (op_add == QPU_A_MIN) {
+ /* Track live clamps of a value clamped to a minimum of 0 and
+ * a maximum of some uniform's offset.
+ */
+ if (!add_a_is_min_0)
+ return;
+
+ if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
+ !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF &&
+ sig != QPU_SIG_SMALL_IMM)) {
+ return;
+ }
+
+ validation_state->live_min_clamp_offsets[lri_add] =
+ validated_shader->uniforms_size;
+ }
+}
+
+static bool
+check_instruction_writes(struct vc4_validated_shader_info *validated_shader,
+ struct vc4_shader_validation_state *validation_state)
+{
+ uint64_t inst = validation_state->shader[validation_state->ip];
+ uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
+ uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
+ bool ok;
+
+ if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) {
+ DRM_DEBUG("ADD and MUL both set up textures\n");
+ return false;
+ }
+
+ ok = (check_reg_write(validated_shader, validation_state, false) &&
+ check_reg_write(validated_shader, validation_state, true));
+
+ track_live_clamps(validated_shader, validation_state);
+
+ return ok;
+}
+
+static bool
+check_branch(uint64_t inst,
+ struct vc4_validated_shader_info *validated_shader,
+ struct vc4_shader_validation_state *validation_state,
+ int ip)
+{
+ int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
+ uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
+ uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
+
+ if ((int)branch_imm < 0)
+ validation_state->needs_uniform_address_for_loop = true;
+
+ /* We don't want to have to worry about validation of this, and
+ * there's no need for it.
+ */
+ if (waddr_add != QPU_W_NOP || waddr_mul != QPU_W_NOP) {
+ DRM_DEBUG("branch instruction at %d wrote a register.\n",
+ validation_state->ip);
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+check_instruction_reads(struct vc4_validated_shader_info *validated_shader,
+ struct vc4_shader_validation_state *validation_state)
+{
+ uint64_t inst = validation_state->shader[validation_state->ip];
+ uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
+ uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+ uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+
+ if (raddr_a == QPU_R_UNIF ||
+ (raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) {
+ /* This can't overflow the uint32_t, because we're reading 8
+ * bytes of instruction to increment by 4 here, so we'd
+ * already be OOM.
+ */
+ validated_shader->uniforms_size += 4;
+
+ if (validation_state->needs_uniform_address_update) {
+ DRM_DEBUG("Uniform read with undefined uniform "
+ "address\n");
+ return false;
+ }
+ }
+
+ if ((raddr_a >= 16 && raddr_a < 32) ||
+ (raddr_b >= 16 && raddr_b < 32 && sig != QPU_SIG_SMALL_IMM)) {
+ validation_state->all_registers_used = true;
+ }
+
+ return true;
+}
+
+/* Make sure that all branches are absolute and point within the shader, and
+ * note their targets for later.
+ */
+static bool
+vc4_validate_branches(struct vc4_shader_validation_state *validation_state)
+{
+ uint32_t max_branch_target = 0;
+ int ip;
+ int last_branch = -2;
+
+ for (ip = 0; ip < validation_state->max_ip; ip++) {
+ uint64_t inst = validation_state->shader[ip];
+ int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
+ uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+ uint32_t after_delay_ip = ip + 4;
+ uint32_t branch_target_ip;
+
+ if (sig == QPU_SIG_PROG_END) {
+ /* There are two delay slots after program end is
+ * signaled that are still executed, then we're
+ * finished. validation_state->max_ip is the
+ * instruction after the last valid instruction in the
+ * program.
+ */
+ validation_state->max_ip = ip + 3;
+ continue;
+ }
+
+ if (sig != QPU_SIG_BRANCH)
+ continue;
+
+ if (ip - last_branch < 4) {
+ DRM_DEBUG("Branch at %d during delay slots\n", ip);
+ return false;
+ }
+ last_branch = ip;
+
+ if (inst & QPU_BRANCH_REG) {
+ DRM_DEBUG("branching from register relative "
+ "not supported\n");
+ return false;
+ }
+
+ if (!(inst & QPU_BRANCH_REL)) {
+ DRM_DEBUG("relative branching required\n");
+ return false;
+ }
+
+ /* The actual branch target is the instruction after the delay
+ * slots, plus whatever byte offset is in the low 32 bits of
+ * the instruction. Make sure we're not branching beyond the
+ * end of the shader object.
+ */
+ if (branch_imm % sizeof(inst) != 0) {
+ DRM_DEBUG("branch target not aligned\n");
+ return false;
+ }
+
+ branch_target_ip = after_delay_ip + (branch_imm >> 3);
+ if (branch_target_ip >= validation_state->max_ip) {
+ DRM_DEBUG("Branch at %d outside of shader (ip %d/%d)\n",
+ ip, branch_target_ip,
+ validation_state->max_ip);
+ return false;
+ }
+ set_bit(branch_target_ip, validation_state->branch_targets);
+
+ /* Make sure that the non-branching path is also not outside
+ * the shader.
+ */
+ if (after_delay_ip >= validation_state->max_ip) {
+ DRM_DEBUG("Branch at %d continues past shader end "
+ "(%d/%d)\n",
+ ip, after_delay_ip, validation_state->max_ip);
+ return false;
+ }
+ set_bit(after_delay_ip, validation_state->branch_targets);
+ max_branch_target = max(max_branch_target, after_delay_ip);
+ }
+
+ if (max_branch_target > validation_state->max_ip - 3) {
+ DRM_DEBUG("Branch landed after QPU_SIG_PROG_END");
+ return false;
+ }
+
+ return true;
+}
+
+/* Resets any known state for the shader, used when we may be branched to from
+ * multiple locations in the program (or at shader start).
+ */
+static void
+reset_validation_state(struct vc4_shader_validation_state *validation_state)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ validation_state->tmu_setup[i / 4].p_offset[i % 4] = ~0;
+
+ for (i = 0; i < LIVE_REG_COUNT; i++) {
+ validation_state->live_min_clamp_offsets[i] = ~0;
+ validation_state->live_max_clamp_regs[i] = false;
+ validation_state->live_immediates[i] = ~0;
+ }
+}
+
+static bool
+texturing_in_progress(struct vc4_shader_validation_state *validation_state)
+{
+ return (validation_state->tmu_write_count[0] != 0 ||
+ validation_state->tmu_write_count[1] != 0);
+}
+
+static bool
+vc4_handle_branch_target(struct vc4_shader_validation_state *validation_state)
+{
+ uint32_t ip = validation_state->ip;
+
+ if (!test_bit(ip, validation_state->branch_targets))
+ return true;
+
+ if (texturing_in_progress(validation_state)) {
+ DRM_DEBUG("Branch target landed during TMU setup\n");
+ return false;
+ }
+
+ /* Reset our live values tracking, since this instruction may have
+ * multiple predecessors.
+ *
+ * One could potentially do analysis to determine that, for
+ * example, all predecessors have a live max clamp in the same
+ * register, but we don't bother with that.
+ */
+ reset_validation_state(validation_state);
+
+ /* Since we've entered a basic block from potentially multiple
+ * predecessors, we need the uniforms address to be updated before any
+ * unforms are read. We require that after any branch point, the next
+ * uniform to be loaded is a uniform address offset. That uniform's
+ * offset will be marked by the uniform address register write
+ * validation, or a one-off the end-of-program check.
+ */
+ validation_state->needs_uniform_address_update = true;
+
+ return true;
+}
+
+struct vc4_validated_shader_info *
+vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
+{
+ bool found_shader_end = false;
+ int shader_end_ip = 0;
+ uint32_t last_thread_switch_ip = -3;
+ uint32_t ip;
+ struct vc4_validated_shader_info *validated_shader = NULL;
+ struct vc4_shader_validation_state validation_state;
+
+ memset(&validation_state, 0, sizeof(validation_state));
+ validation_state.shader = shader_obj->vaddr;
+ validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t);
+
+ reset_validation_state(&validation_state);
+
+ validation_state.branch_targets =
+ kcalloc(BITS_TO_LONGS(validation_state.max_ip),
+ sizeof(unsigned long), GFP_KERNEL);
+ if (!validation_state.branch_targets)
+ goto fail;
+
+ validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL);
+ if (!validated_shader)
+ goto fail;
+
+ if (!vc4_validate_branches(&validation_state))
+ goto fail;
+
+ for (ip = 0; ip < validation_state.max_ip; ip++) {
+ uint64_t inst = validation_state.shader[ip];
+ uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+
+ validation_state.ip = ip;
+
+ if (!vc4_handle_branch_target(&validation_state))
+ goto fail;
+
+ if (ip == last_thread_switch_ip + 3) {
+ /* Reset r0-r3 live clamp data */
+ int i;
+
+ for (i = 64; i < LIVE_REG_COUNT; i++) {
+ validation_state.live_min_clamp_offsets[i] = ~0;
+ validation_state.live_max_clamp_regs[i] = false;
+ validation_state.live_immediates[i] = ~0;
+ }
+ }
+
+ switch (sig) {
+ case QPU_SIG_NONE:
+ case QPU_SIG_WAIT_FOR_SCOREBOARD:
+ case QPU_SIG_SCOREBOARD_UNLOCK:
+ case QPU_SIG_COLOR_LOAD:
+ case QPU_SIG_LOAD_TMU0:
+ case QPU_SIG_LOAD_TMU1:
+ case QPU_SIG_PROG_END:
+ case QPU_SIG_SMALL_IMM:
+ case QPU_SIG_THREAD_SWITCH:
+ case QPU_SIG_LAST_THREAD_SWITCH:
+ if (!check_instruction_writes(validated_shader,
+ &validation_state)) {
+ DRM_DEBUG("Bad write at ip %d\n", ip);
+ goto fail;
+ }
+
+ if (!check_instruction_reads(validated_shader,
+ &validation_state))
+ goto fail;
+
+ if (sig == QPU_SIG_PROG_END) {
+ found_shader_end = true;
+ shader_end_ip = ip;
+ }
+
+ if (sig == QPU_SIG_THREAD_SWITCH ||
+ sig == QPU_SIG_LAST_THREAD_SWITCH) {
+ validated_shader->is_threaded = true;
+
+ if (ip < last_thread_switch_ip + 3) {
+ DRM_DEBUG("Thread switch too soon after "
+ "last switch at ip %d\n", ip);
+ goto fail;
+ }
+ last_thread_switch_ip = ip;
+ }
+
+ break;
+
+ case QPU_SIG_LOAD_IMM:
+ if (!check_instruction_writes(validated_shader,
+ &validation_state)) {
+ DRM_DEBUG("Bad LOAD_IMM write at ip %d\n", ip);
+ goto fail;
+ }
+ break;
+
+ case QPU_SIG_BRANCH:
+ if (!check_branch(inst, validated_shader,
+ &validation_state, ip))
+ goto fail;
+
+ if (ip < last_thread_switch_ip + 3) {
+ DRM_DEBUG("Branch in thread switch at ip %d",
+ ip);
+ goto fail;
+ }
+
+ break;
+ default:
+ DRM_DEBUG("Unsupported QPU signal %d at "
+ "instruction %d\n", sig, ip);
+ goto fail;
+ }
+
+ /* There are two delay slots after program end is signaled
+ * that are still executed, then we're finished.
+ */
+ if (found_shader_end && ip == shader_end_ip + 2)
+ break;
+ }
+
+ if (ip == validation_state.max_ip) {
+ DRM_DEBUG("shader failed to terminate before "
+ "shader BO end at %zd\n",
+ shader_obj->base.size);
+ goto fail;
+ }
+
+ /* Might corrupt other thread */
+ if (validated_shader->is_threaded &&
+ validation_state.all_registers_used) {
+ DRM_DEBUG("Shader uses threading, but uses the upper "
+ "half of the registers, too\n");
+ goto fail;
+ }
+
+ /* If we did a backwards branch and we haven't emitted a uniforms
+ * reset since then, we still need the uniforms stream to have the
+ * uniforms address available so that the backwards branch can do its
+ * uniforms reset.
+ *
+ * We could potentially prove that the backwards branch doesn't
+ * contain any uses of uniforms until program exit, but that doesn't
+ * seem to be worth the trouble.
+ */
+ if (validation_state.needs_uniform_address_for_loop) {
+ if (!require_uniform_address_uniform(validated_shader))
+ goto fail;
+ validated_shader->uniforms_size += 4;
+ }
+
+ /* Again, no chance of integer overflow here because the worst case
+ * scenario is 8 bytes of uniforms plus handles per 8-byte
+ * instruction.
+ */
+ validated_shader->uniforms_src_size =
+ (validated_shader->uniforms_size +
+ 4 * validated_shader->num_texture_samples);
+
+ kfree(validation_state.branch_targets);
+
+ return validated_shader;
+
+fail:
+ kfree(validation_state.branch_targets);
+ if (validated_shader) {
+ kfree(validated_shader->uniform_addr_offsets);
+ kfree(validated_shader->texture_samples);
+ kfree(validated_shader);
+ }
+ return NULL;
+}
diff --git a/drivers/gpu/drm/vc4/vc4_vec.c b/drivers/gpu/drm/vc4/vc4_vec.c
new file mode 100644
index 000000000..8e7facb65
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_vec.c
@@ -0,0 +1,661 @@
+/*
+ * Copyright (C) 2016 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * DOC: VC4 SDTV module
+ *
+ * The VEC encoder generates PAL or NTSC composite video output.
+ *
+ * TV mode selection is done by an atomic property on the encoder,
+ * because a drm_mode_modeinfo is insufficient to distinguish between
+ * PAL and PAL-M or NTSC and NTSC-J.
+ */
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_panel.h>
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/of_graph.h>
+#include <linux/of_platform.h>
+#include <linux/pm_runtime.h>
+
+#include "vc4_drv.h"
+#include "vc4_regs.h"
+
+/* WSE Registers */
+#define VEC_WSE_RESET 0xc0
+
+#define VEC_WSE_CONTROL 0xc4
+#define VEC_WSE_WSS_ENABLE BIT(7)
+
+#define VEC_WSE_WSS_DATA 0xc8
+#define VEC_WSE_VPS_DATA1 0xcc
+#define VEC_WSE_VPS_CONTROL 0xd0
+
+/* VEC Registers */
+#define VEC_REVID 0x100
+
+#define VEC_CONFIG0 0x104
+#define VEC_CONFIG0_YDEL_MASK GENMASK(28, 26)
+#define VEC_CONFIG0_YDEL(x) ((x) << 26)
+#define VEC_CONFIG0_CDEL_MASK GENMASK(25, 24)
+#define VEC_CONFIG0_CDEL(x) ((x) << 24)
+#define VEC_CONFIG0_PBPR_FIL BIT(18)
+#define VEC_CONFIG0_CHROMA_GAIN_MASK GENMASK(17, 16)
+#define VEC_CONFIG0_CHROMA_GAIN_UNITY (0 << 16)
+#define VEC_CONFIG0_CHROMA_GAIN_1_32 (1 << 16)
+#define VEC_CONFIG0_CHROMA_GAIN_1_16 (2 << 16)
+#define VEC_CONFIG0_CHROMA_GAIN_1_8 (3 << 16)
+#define VEC_CONFIG0_CBURST_GAIN_MASK GENMASK(14, 13)
+#define VEC_CONFIG0_CBURST_GAIN_UNITY (0 << 13)
+#define VEC_CONFIG0_CBURST_GAIN_1_128 (1 << 13)
+#define VEC_CONFIG0_CBURST_GAIN_1_64 (2 << 13)
+#define VEC_CONFIG0_CBURST_GAIN_1_32 (3 << 13)
+#define VEC_CONFIG0_CHRBW1 BIT(11)
+#define VEC_CONFIG0_CHRBW0 BIT(10)
+#define VEC_CONFIG0_SYNCDIS BIT(9)
+#define VEC_CONFIG0_BURDIS BIT(8)
+#define VEC_CONFIG0_CHRDIS BIT(7)
+#define VEC_CONFIG0_PDEN BIT(6)
+#define VEC_CONFIG0_YCDELAY BIT(4)
+#define VEC_CONFIG0_RAMPEN BIT(2)
+#define VEC_CONFIG0_YCDIS BIT(2)
+#define VEC_CONFIG0_STD_MASK GENMASK(1, 0)
+#define VEC_CONFIG0_NTSC_STD 0
+#define VEC_CONFIG0_PAL_BDGHI_STD 1
+#define VEC_CONFIG0_PAL_N_STD 3
+
+#define VEC_SCHPH 0x108
+#define VEC_SOFT_RESET 0x10c
+#define VEC_CLMP0_START 0x144
+#define VEC_CLMP0_END 0x148
+#define VEC_FREQ3_2 0x180
+#define VEC_FREQ1_0 0x184
+
+#define VEC_CONFIG1 0x188
+#define VEC_CONFIG_VEC_RESYNC_OFF BIT(18)
+#define VEC_CONFIG_RGB219 BIT(17)
+#define VEC_CONFIG_CBAR_EN BIT(16)
+#define VEC_CONFIG_TC_OBB BIT(15)
+#define VEC_CONFIG1_OUTPUT_MODE_MASK GENMASK(12, 10)
+#define VEC_CONFIG1_C_Y_CVBS (0 << 10)
+#define VEC_CONFIG1_CVBS_Y_C (1 << 10)
+#define VEC_CONFIG1_PR_Y_PB (2 << 10)
+#define VEC_CONFIG1_RGB (4 << 10)
+#define VEC_CONFIG1_Y_C_CVBS (5 << 10)
+#define VEC_CONFIG1_C_CVBS_Y (6 << 10)
+#define VEC_CONFIG1_C_CVBS_CVBS (7 << 10)
+#define VEC_CONFIG1_DIS_CHR BIT(9)
+#define VEC_CONFIG1_DIS_LUMA BIT(8)
+#define VEC_CONFIG1_YCBCR_IN BIT(6)
+#define VEC_CONFIG1_DITHER_TYPE_LFSR 0
+#define VEC_CONFIG1_DITHER_TYPE_COUNTER BIT(5)
+#define VEC_CONFIG1_DITHER_EN BIT(4)
+#define VEC_CONFIG1_CYDELAY BIT(3)
+#define VEC_CONFIG1_LUMADIS BIT(2)
+#define VEC_CONFIG1_COMPDIS BIT(1)
+#define VEC_CONFIG1_CUSTOM_FREQ BIT(0)
+
+#define VEC_CONFIG2 0x18c
+#define VEC_CONFIG2_PROG_SCAN BIT(15)
+#define VEC_CONFIG2_SYNC_ADJ_MASK GENMASK(14, 12)
+#define VEC_CONFIG2_SYNC_ADJ(x) (((x) / 2) << 12)
+#define VEC_CONFIG2_PBPR_EN BIT(10)
+#define VEC_CONFIG2_UV_DIG_DIS BIT(6)
+#define VEC_CONFIG2_RGB_DIG_DIS BIT(5)
+#define VEC_CONFIG2_TMUX_MASK GENMASK(3, 2)
+#define VEC_CONFIG2_TMUX_DRIVE0 (0 << 2)
+#define VEC_CONFIG2_TMUX_RG_COMP (1 << 2)
+#define VEC_CONFIG2_TMUX_UV_YC (2 << 2)
+#define VEC_CONFIG2_TMUX_SYNC_YC (3 << 2)
+
+#define VEC_INTERRUPT_CONTROL 0x190
+#define VEC_INTERRUPT_STATUS 0x194
+#define VEC_FCW_SECAM_B 0x198
+#define VEC_SECAM_GAIN_VAL 0x19c
+
+#define VEC_CONFIG3 0x1a0
+#define VEC_CONFIG3_HORIZ_LEN_STD (0 << 0)
+#define VEC_CONFIG3_HORIZ_LEN_MPEG1_SIF (1 << 0)
+#define VEC_CONFIG3_SHAPE_NON_LINEAR BIT(1)
+
+#define VEC_STATUS0 0x200
+#define VEC_MASK0 0x204
+
+#define VEC_CFG 0x208
+#define VEC_CFG_SG_MODE_MASK GENMASK(6, 5)
+#define VEC_CFG_SG_MODE(x) ((x) << 5)
+#define VEC_CFG_SG_EN BIT(4)
+#define VEC_CFG_VEC_EN BIT(3)
+#define VEC_CFG_MB_EN BIT(2)
+#define VEC_CFG_ENABLE BIT(1)
+#define VEC_CFG_TB_EN BIT(0)
+
+#define VEC_DAC_TEST 0x20c
+
+#define VEC_DAC_CONFIG 0x210
+#define VEC_DAC_CONFIG_LDO_BIAS_CTRL(x) ((x) << 24)
+#define VEC_DAC_CONFIG_DRIVER_CTRL(x) ((x) << 16)
+#define VEC_DAC_CONFIG_DAC_CTRL(x) (x)
+
+#define VEC_DAC_MISC 0x214
+#define VEC_DAC_MISC_VCD_CTRL_MASK GENMASK(31, 16)
+#define VEC_DAC_MISC_VCD_CTRL(x) ((x) << 16)
+#define VEC_DAC_MISC_VID_ACT BIT(8)
+#define VEC_DAC_MISC_VCD_PWRDN BIT(6)
+#define VEC_DAC_MISC_BIAS_PWRDN BIT(5)
+#define VEC_DAC_MISC_DAC_PWRDN BIT(2)
+#define VEC_DAC_MISC_LDO_PWRDN BIT(1)
+#define VEC_DAC_MISC_DAC_RST_N BIT(0)
+
+
+/* General VEC hardware state. */
+struct vc4_vec {
+ struct platform_device *pdev;
+
+ struct drm_encoder *encoder;
+ struct drm_connector *connector;
+
+ void __iomem *regs;
+
+ struct clk *clock;
+
+ const struct vc4_vec_tv_mode *tv_mode;
+};
+
+#define VEC_READ(offset) readl(vec->regs + (offset))
+#define VEC_WRITE(offset, val) writel(val, vec->regs + (offset))
+
+/* VC4 VEC encoder KMS struct */
+struct vc4_vec_encoder {
+ struct vc4_encoder base;
+ struct vc4_vec *vec;
+};
+
+static inline struct vc4_vec_encoder *
+to_vc4_vec_encoder(struct drm_encoder *encoder)
+{
+ return container_of(encoder, struct vc4_vec_encoder, base.base);
+}
+
+/* VC4 VEC connector KMS struct */
+struct vc4_vec_connector {
+ struct drm_connector base;
+ struct vc4_vec *vec;
+
+ /* Since the connector is attached to just the one encoder,
+ * this is the reference to it so we can do the best_encoder()
+ * hook.
+ */
+ struct drm_encoder *encoder;
+};
+
+static inline struct vc4_vec_connector *
+to_vc4_vec_connector(struct drm_connector *connector)
+{
+ return container_of(connector, struct vc4_vec_connector, base);
+}
+
+enum vc4_vec_tv_mode_id {
+ VC4_VEC_TV_MODE_NTSC,
+ VC4_VEC_TV_MODE_NTSC_J,
+ VC4_VEC_TV_MODE_PAL,
+ VC4_VEC_TV_MODE_PAL_M,
+};
+
+struct vc4_vec_tv_mode {
+ const struct drm_display_mode *mode;
+ void (*mode_set)(struct vc4_vec *vec);
+};
+
+#define VEC_REG(reg) { reg, #reg }
+static const struct {
+ u32 reg;
+ const char *name;
+} vec_regs[] = {
+ VEC_REG(VEC_WSE_CONTROL),
+ VEC_REG(VEC_WSE_WSS_DATA),
+ VEC_REG(VEC_WSE_VPS_DATA1),
+ VEC_REG(VEC_WSE_VPS_CONTROL),
+ VEC_REG(VEC_REVID),
+ VEC_REG(VEC_CONFIG0),
+ VEC_REG(VEC_SCHPH),
+ VEC_REG(VEC_CLMP0_START),
+ VEC_REG(VEC_CLMP0_END),
+ VEC_REG(VEC_FREQ3_2),
+ VEC_REG(VEC_FREQ1_0),
+ VEC_REG(VEC_CONFIG1),
+ VEC_REG(VEC_CONFIG2),
+ VEC_REG(VEC_INTERRUPT_CONTROL),
+ VEC_REG(VEC_INTERRUPT_STATUS),
+ VEC_REG(VEC_FCW_SECAM_B),
+ VEC_REG(VEC_SECAM_GAIN_VAL),
+ VEC_REG(VEC_CONFIG3),
+ VEC_REG(VEC_STATUS0),
+ VEC_REG(VEC_MASK0),
+ VEC_REG(VEC_CFG),
+ VEC_REG(VEC_DAC_TEST),
+ VEC_REG(VEC_DAC_CONFIG),
+ VEC_REG(VEC_DAC_MISC),
+};
+
+#ifdef CONFIG_DEBUG_FS
+int vc4_vec_debugfs_regs(struct seq_file *m, void *unused)
+{
+ struct drm_info_node *node = (struct drm_info_node *)m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_vec *vec = vc4->vec;
+ int i;
+
+ if (!vec)
+ return 0;
+
+ for (i = 0; i < ARRAY_SIZE(vec_regs); i++) {
+ seq_printf(m, "%s (0x%04x): 0x%08x\n",
+ vec_regs[i].name, vec_regs[i].reg,
+ VEC_READ(vec_regs[i].reg));
+ }
+
+ return 0;
+}
+#endif
+
+static void vc4_vec_ntsc_mode_set(struct vc4_vec *vec)
+{
+ VEC_WRITE(VEC_CONFIG0, VEC_CONFIG0_NTSC_STD | VEC_CONFIG0_PDEN);
+ VEC_WRITE(VEC_CONFIG1, VEC_CONFIG1_C_CVBS_CVBS);
+}
+
+static void vc4_vec_ntsc_j_mode_set(struct vc4_vec *vec)
+{
+ VEC_WRITE(VEC_CONFIG0, VEC_CONFIG0_NTSC_STD);
+ VEC_WRITE(VEC_CONFIG1, VEC_CONFIG1_C_CVBS_CVBS);
+}
+
+static const struct drm_display_mode ntsc_mode = {
+ DRM_MODE("720x480", DRM_MODE_TYPE_DRIVER, 13500,
+ 720, 720 + 14, 720 + 14 + 64, 720 + 14 + 64 + 60, 0,
+ 480, 480 + 3, 480 + 3 + 3, 480 + 3 + 3 + 16, 0,
+ DRM_MODE_FLAG_INTERLACE)
+};
+
+static void vc4_vec_pal_mode_set(struct vc4_vec *vec)
+{
+ VEC_WRITE(VEC_CONFIG0, VEC_CONFIG0_PAL_BDGHI_STD);
+ VEC_WRITE(VEC_CONFIG1, VEC_CONFIG1_C_CVBS_CVBS);
+}
+
+static void vc4_vec_pal_m_mode_set(struct vc4_vec *vec)
+{
+ VEC_WRITE(VEC_CONFIG0, VEC_CONFIG0_PAL_BDGHI_STD);
+ VEC_WRITE(VEC_CONFIG1,
+ VEC_CONFIG1_C_CVBS_CVBS | VEC_CONFIG1_CUSTOM_FREQ);
+ VEC_WRITE(VEC_FREQ3_2, 0x223b);
+ VEC_WRITE(VEC_FREQ1_0, 0x61d1);
+}
+
+static const struct drm_display_mode pal_mode = {
+ DRM_MODE("720x576", DRM_MODE_TYPE_DRIVER, 13500,
+ 720, 720 + 20, 720 + 20 + 64, 720 + 20 + 64 + 60, 0,
+ 576, 576 + 2, 576 + 2 + 3, 576 + 2 + 3 + 20, 0,
+ DRM_MODE_FLAG_INTERLACE)
+};
+
+static const struct vc4_vec_tv_mode vc4_vec_tv_modes[] = {
+ [VC4_VEC_TV_MODE_NTSC] = {
+ .mode = &ntsc_mode,
+ .mode_set = vc4_vec_ntsc_mode_set,
+ },
+ [VC4_VEC_TV_MODE_NTSC_J] = {
+ .mode = &ntsc_mode,
+ .mode_set = vc4_vec_ntsc_j_mode_set,
+ },
+ [VC4_VEC_TV_MODE_PAL] = {
+ .mode = &pal_mode,
+ .mode_set = vc4_vec_pal_mode_set,
+ },
+ [VC4_VEC_TV_MODE_PAL_M] = {
+ .mode = &pal_mode,
+ .mode_set = vc4_vec_pal_m_mode_set,
+ },
+};
+
+static enum drm_connector_status
+vc4_vec_connector_detect(struct drm_connector *connector, bool force)
+{
+ return connector_status_unknown;
+}
+
+static void vc4_vec_connector_destroy(struct drm_connector *connector)
+{
+ drm_connector_unregister(connector);
+ drm_connector_cleanup(connector);
+}
+
+static int vc4_vec_connector_get_modes(struct drm_connector *connector)
+{
+ struct drm_connector_state *state = connector->state;
+ struct drm_display_mode *mode;
+
+ mode = drm_mode_duplicate(connector->dev,
+ vc4_vec_tv_modes[state->tv.mode].mode);
+ if (!mode) {
+ DRM_ERROR("Failed to create a new display mode\n");
+ return -ENOMEM;
+ }
+
+ drm_mode_probed_add(connector, mode);
+
+ return 1;
+}
+
+static const struct drm_connector_funcs vc4_vec_connector_funcs = {
+ .detect = vc4_vec_connector_detect,
+ .fill_modes = drm_helper_probe_single_connector_modes,
+ .destroy = vc4_vec_connector_destroy,
+ .reset = drm_atomic_helper_connector_reset,
+ .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+ .atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+static const struct drm_connector_helper_funcs vc4_vec_connector_helper_funcs = {
+ .get_modes = vc4_vec_connector_get_modes,
+};
+
+static struct drm_connector *vc4_vec_connector_init(struct drm_device *dev,
+ struct vc4_vec *vec)
+{
+ struct drm_connector *connector = NULL;
+ struct vc4_vec_connector *vec_connector;
+
+ vec_connector = devm_kzalloc(dev->dev, sizeof(*vec_connector),
+ GFP_KERNEL);
+ if (!vec_connector)
+ return ERR_PTR(-ENOMEM);
+
+ connector = &vec_connector->base;
+ connector->interlace_allowed = true;
+
+ vec_connector->encoder = vec->encoder;
+ vec_connector->vec = vec;
+
+ drm_connector_init(dev, connector, &vc4_vec_connector_funcs,
+ DRM_MODE_CONNECTOR_Composite);
+ drm_connector_helper_add(connector, &vc4_vec_connector_helper_funcs);
+
+ drm_object_attach_property(&connector->base,
+ dev->mode_config.tv_mode_property,
+ VC4_VEC_TV_MODE_NTSC);
+ vec->tv_mode = &vc4_vec_tv_modes[VC4_VEC_TV_MODE_NTSC];
+
+ drm_connector_attach_encoder(connector, vec->encoder);
+
+ return connector;
+}
+
+static const struct drm_encoder_funcs vc4_vec_encoder_funcs = {
+ .destroy = drm_encoder_cleanup,
+};
+
+static void vc4_vec_encoder_disable(struct drm_encoder *encoder)
+{
+ struct vc4_vec_encoder *vc4_vec_encoder = to_vc4_vec_encoder(encoder);
+ struct vc4_vec *vec = vc4_vec_encoder->vec;
+ int ret;
+
+ VEC_WRITE(VEC_CFG, 0);
+ VEC_WRITE(VEC_DAC_MISC,
+ VEC_DAC_MISC_VCD_PWRDN |
+ VEC_DAC_MISC_BIAS_PWRDN |
+ VEC_DAC_MISC_DAC_PWRDN |
+ VEC_DAC_MISC_LDO_PWRDN);
+
+ clk_disable_unprepare(vec->clock);
+
+ ret = pm_runtime_put(&vec->pdev->dev);
+ if (ret < 0) {
+ DRM_ERROR("Failed to release power domain: %d\n", ret);
+ return;
+ }
+}
+
+static void vc4_vec_encoder_enable(struct drm_encoder *encoder)
+{
+ struct vc4_vec_encoder *vc4_vec_encoder = to_vc4_vec_encoder(encoder);
+ struct vc4_vec *vec = vc4_vec_encoder->vec;
+ int ret;
+
+ ret = pm_runtime_get_sync(&vec->pdev->dev);
+ if (ret < 0) {
+ DRM_ERROR("Failed to retain power domain: %d\n", ret);
+ return;
+ }
+
+ /*
+ * We need to set the clock rate each time we enable the encoder
+ * because there's a chance we share the same parent with the HDMI
+ * clock, and both drivers are requesting different rates.
+ * The good news is, these 2 encoders cannot be enabled at the same
+ * time, thus preventing incompatible rate requests.
+ */
+ ret = clk_set_rate(vec->clock, 108000000);
+ if (ret) {
+ DRM_ERROR("Failed to set clock rate: %d\n", ret);
+ return;
+ }
+
+ ret = clk_prepare_enable(vec->clock);
+ if (ret) {
+ DRM_ERROR("Failed to turn on core clock: %d\n", ret);
+ return;
+ }
+
+ /* Reset the different blocks */
+ VEC_WRITE(VEC_WSE_RESET, 1);
+ VEC_WRITE(VEC_SOFT_RESET, 1);
+
+ /* Disable the CGSM-A and WSE blocks */
+ VEC_WRITE(VEC_WSE_CONTROL, 0);
+
+ /* Write config common to all modes. */
+
+ /*
+ * Color subcarrier phase: phase = 360 * SCHPH / 256.
+ * 0x28 <=> 39.375 deg.
+ */
+ VEC_WRITE(VEC_SCHPH, 0x28);
+
+ /*
+ * Reset to default values.
+ */
+ VEC_WRITE(VEC_CLMP0_START, 0xac);
+ VEC_WRITE(VEC_CLMP0_END, 0xec);
+ VEC_WRITE(VEC_CONFIG2,
+ VEC_CONFIG2_UV_DIG_DIS | VEC_CONFIG2_RGB_DIG_DIS);
+ VEC_WRITE(VEC_CONFIG3, VEC_CONFIG3_HORIZ_LEN_STD);
+ VEC_WRITE(VEC_DAC_CONFIG,
+ VEC_DAC_CONFIG_DAC_CTRL(0xc) |
+ VEC_DAC_CONFIG_DRIVER_CTRL(0xc) |
+ VEC_DAC_CONFIG_LDO_BIAS_CTRL(0x46));
+
+ /* Mask all interrupts. */
+ VEC_WRITE(VEC_MASK0, 0);
+
+ vec->tv_mode->mode_set(vec);
+
+ VEC_WRITE(VEC_DAC_MISC,
+ VEC_DAC_MISC_VID_ACT | VEC_DAC_MISC_DAC_RST_N);
+ VEC_WRITE(VEC_CFG, VEC_CFG_VEC_EN);
+}
+
+
+static bool vc4_vec_encoder_mode_fixup(struct drm_encoder *encoder,
+ const struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode)
+{
+ return true;
+}
+
+static void vc4_vec_encoder_atomic_mode_set(struct drm_encoder *encoder,
+ struct drm_crtc_state *crtc_state,
+ struct drm_connector_state *conn_state)
+{
+ struct vc4_vec_encoder *vc4_vec_encoder = to_vc4_vec_encoder(encoder);
+ struct vc4_vec *vec = vc4_vec_encoder->vec;
+
+ vec->tv_mode = &vc4_vec_tv_modes[conn_state->tv.mode];
+}
+
+static int vc4_vec_encoder_atomic_check(struct drm_encoder *encoder,
+ struct drm_crtc_state *crtc_state,
+ struct drm_connector_state *conn_state)
+{
+ const struct vc4_vec_tv_mode *vec_mode;
+
+ vec_mode = &vc4_vec_tv_modes[conn_state->tv.mode];
+
+ if (conn_state->crtc &&
+ !drm_mode_equal(vec_mode->mode, &crtc_state->adjusted_mode))
+ return -EINVAL;
+
+ return 0;
+}
+
+static const struct drm_encoder_helper_funcs vc4_vec_encoder_helper_funcs = {
+ .disable = vc4_vec_encoder_disable,
+ .enable = vc4_vec_encoder_enable,
+ .mode_fixup = vc4_vec_encoder_mode_fixup,
+ .atomic_check = vc4_vec_encoder_atomic_check,
+ .atomic_mode_set = vc4_vec_encoder_atomic_mode_set,
+};
+
+static const struct of_device_id vc4_vec_dt_match[] = {
+ { .compatible = "brcm,bcm2835-vec", .data = NULL },
+ { /* sentinel */ },
+};
+
+static const char * const tv_mode_names[] = {
+ [VC4_VEC_TV_MODE_NTSC] = "NTSC",
+ [VC4_VEC_TV_MODE_NTSC_J] = "NTSC-J",
+ [VC4_VEC_TV_MODE_PAL] = "PAL",
+ [VC4_VEC_TV_MODE_PAL_M] = "PAL-M",
+};
+
+static int vc4_vec_bind(struct device *dev, struct device *master, void *data)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct drm_device *drm = dev_get_drvdata(master);
+ struct vc4_dev *vc4 = to_vc4_dev(drm);
+ struct vc4_vec *vec;
+ struct vc4_vec_encoder *vc4_vec_encoder;
+ int ret;
+
+ ret = drm_mode_create_tv_properties(drm, ARRAY_SIZE(tv_mode_names),
+ tv_mode_names);
+ if (ret)
+ return ret;
+
+ vec = devm_kzalloc(dev, sizeof(*vec), GFP_KERNEL);
+ if (!vec)
+ return -ENOMEM;
+
+ vc4_vec_encoder = devm_kzalloc(dev, sizeof(*vc4_vec_encoder),
+ GFP_KERNEL);
+ if (!vc4_vec_encoder)
+ return -ENOMEM;
+ vc4_vec_encoder->base.type = VC4_ENCODER_TYPE_VEC;
+ vc4_vec_encoder->vec = vec;
+ vec->encoder = &vc4_vec_encoder->base.base;
+
+ vec->pdev = pdev;
+ vec->regs = vc4_ioremap_regs(pdev, 0);
+ if (IS_ERR(vec->regs))
+ return PTR_ERR(vec->regs);
+
+ vec->clock = devm_clk_get(dev, NULL);
+ if (IS_ERR(vec->clock)) {
+ ret = PTR_ERR(vec->clock);
+ if (ret != -EPROBE_DEFER)
+ DRM_ERROR("Failed to get clock: %d\n", ret);
+ return ret;
+ }
+
+ pm_runtime_enable(dev);
+
+ drm_encoder_init(drm, vec->encoder, &vc4_vec_encoder_funcs,
+ DRM_MODE_ENCODER_TVDAC, NULL);
+ drm_encoder_helper_add(vec->encoder, &vc4_vec_encoder_helper_funcs);
+
+ vec->connector = vc4_vec_connector_init(drm, vec);
+ if (IS_ERR(vec->connector)) {
+ ret = PTR_ERR(vec->connector);
+ goto err_destroy_encoder;
+ }
+
+ dev_set_drvdata(dev, vec);
+
+ vc4->vec = vec;
+
+ return 0;
+
+err_destroy_encoder:
+ drm_encoder_cleanup(vec->encoder);
+ pm_runtime_disable(dev);
+
+ return ret;
+}
+
+static void vc4_vec_unbind(struct device *dev, struct device *master,
+ void *data)
+{
+ struct drm_device *drm = dev_get_drvdata(master);
+ struct vc4_dev *vc4 = to_vc4_dev(drm);
+ struct vc4_vec *vec = dev_get_drvdata(dev);
+
+ vc4_vec_connector_destroy(vec->connector);
+ drm_encoder_cleanup(vec->encoder);
+ pm_runtime_disable(dev);
+
+ vc4->vec = NULL;
+}
+
+static const struct component_ops vc4_vec_ops = {
+ .bind = vc4_vec_bind,
+ .unbind = vc4_vec_unbind,
+};
+
+static int vc4_vec_dev_probe(struct platform_device *pdev)
+{
+ return component_add(&pdev->dev, &vc4_vec_ops);
+}
+
+static int vc4_vec_dev_remove(struct platform_device *pdev)
+{
+ component_del(&pdev->dev, &vc4_vec_ops);
+ return 0;
+}
+
+struct platform_driver vc4_vec_driver = {
+ .probe = vc4_vec_dev_probe,
+ .remove = vc4_vec_dev_remove,
+ .driver = {
+ .name = "vc4_vec",
+ .of_match_table = vc4_vec_dt_match,
+ },
+};