Adding upstream version 6.6.15.upstream/6.6.15

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-11 08:27:49 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-11 08:27:49 +0000
commit: ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
tree: b2d64bc10158fdd5497876388cd68142ca374ed3 /drivers/gpu/drm/panfrost
parent: Initial commit. (diff)
download: linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz
linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip
24 files changed, 5899 insertions, 0 deletions
diff --git a/drivers/gpu/drm/panfrost/Kconfig b/drivers/gpu/drm/panfrost/Kconfig
new file mode 100644
index 0000000000..e6403a9d66
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/Kconfig
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config DRM_PANFROST
+	tristate "Panfrost (DRM support for ARM Mali Midgard/Bifrost GPUs)"
+	depends on DRM
+	depends on ARM || ARM64 || COMPILE_TEST
+	depends on !GENERIC_ATOMIC64    # for IOMMU_IO_PGTABLE_LPAE
+	depends on MMU
+	select DRM_SCHED
+	select IOMMU_SUPPORT
+	select IOMMU_IO_PGTABLE_LPAE
+	select DRM_GEM_SHMEM_HELPER
+	select PM_DEVFREQ
+	select DEVFREQ_GOV_SIMPLE_ONDEMAND
+	select WANT_DEV_COREDUMP
+	help
+	  DRM driver for ARM Mali Midgard (T6xx, T7xx, T8xx) and
+	  Bifrost (G3x, G5x, G7x) GPUs.
diff --git a/drivers/gpu/drm/panfrost/Makefile b/drivers/gpu/drm/panfrost/Makefile
new file mode 100644
index 0000000000..7da2b3f02e
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+
+panfrost-y := \
+	panfrost_drv.o \
+	panfrost_device.o \
+	panfrost_devfreq.o \
+	panfrost_gem.o \
+	panfrost_gem_shrinker.o \
+	panfrost_gpu.o \
+	panfrost_job.o \
+	panfrost_mmu.o \
+	panfrost_perfcnt.o \
+	panfrost_dump.o
+
+obj-$(CONFIG_DRM_PANFROST) += panfrost.o
diff --git a/drivers/gpu/drm/panfrost/TODO b/drivers/gpu/drm/panfrost/TODO
new file mode 100644
index 0000000000..8c811a9e68
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/TODO
@@ -0,0 +1,14 @@
+- Thermal support.
+
+- Bifrost support:
+  - DT bindings (Neil, WIP)
+  - MMU page table format and address space setup
+  - Bifrost specific feature and issue handling
+  - Coherent DMA support
+
+- Support userspace controlled GPU virtual addresses. Needed for Vulkan. (Tomeu)
+
+- Compute job support. So called 'compute only' jobs need to be plumbed up to
+  userspace.
+
+- Support core dump on job failure
diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
new file mode 100644
index 0000000000..e78de99e99
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2019 Collabora ltd. */
+
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#include <linux/devfreq_cooling.h>
+#include <linux/nvmem-consumer.h>
+#include <linux/platform_device.h>
+#include <linux/pm_opp.h>
+
+#include "panfrost_device.h"
+#include "panfrost_devfreq.h"
+
+static void panfrost_devfreq_update_utilization(struct panfrost_devfreq *pfdevfreq)
+{
+	ktime_t now, last;
+
+	now = ktime_get();
+	last = pfdevfreq->time_last_update;
+
+	if (pfdevfreq->busy_count > 0)
+		pfdevfreq->busy_time += ktime_sub(now, last);
+	else
+		pfdevfreq->idle_time += ktime_sub(now, last);
+
+	pfdevfreq->time_last_update = now;
+}
+
+static int panfrost_devfreq_target(struct device *dev, unsigned long *freq,
+				   u32 flags)
+{
+	struct dev_pm_opp *opp;
+
+	opp = devfreq_recommended_opp(dev, freq, flags);
+	if (IS_ERR(opp))
+		return PTR_ERR(opp);
+	dev_pm_opp_put(opp);
+
+	return dev_pm_opp_set_rate(dev, *freq);
+}
+
+static void panfrost_devfreq_reset(struct panfrost_devfreq *pfdevfreq)
+{
+	pfdevfreq->busy_time = 0;
+	pfdevfreq->idle_time = 0;
+	pfdevfreq->time_last_update = ktime_get();
+}
+
+static int panfrost_devfreq_get_dev_status(struct device *dev,
+					   struct devfreq_dev_status *status)
+{
+	struct panfrost_device *pfdev = dev_get_drvdata(dev);
+	struct panfrost_devfreq *pfdevfreq = &pfdev->pfdevfreq;
+	unsigned long irqflags;
+
+	status->current_frequency = clk_get_rate(pfdev->clock);
+
+	spin_lock_irqsave(&pfdevfreq->lock, irqflags);
+
+	panfrost_devfreq_update_utilization(pfdevfreq);
+
+	status->total_time = ktime_to_ns(ktime_add(pfdevfreq->busy_time,
+						   pfdevfreq->idle_time));
+
+	status->busy_time = ktime_to_ns(pfdevfreq->busy_time);
+
+	panfrost_devfreq_reset(pfdevfreq);
+
+	spin_unlock_irqrestore(&pfdevfreq->lock, irqflags);
+
+	dev_dbg(pfdev->dev, "busy %lu total %lu %lu %% freq %lu MHz\n",
+		status->busy_time, status->total_time,
+		status->busy_time / (status->total_time / 100),
+		status->current_frequency / 1000 / 1000);
+
+	return 0;
+}
+
+static struct devfreq_dev_profile panfrost_devfreq_profile = {
+	.timer = DEVFREQ_TIMER_DELAYED,
+	.polling_ms = 50, /* ~3 frames */
+	.target = panfrost_devfreq_target,
+	.get_dev_status = panfrost_devfreq_get_dev_status,
+};
+
+static int panfrost_read_speedbin(struct device *dev)
+{
+	u32 val;
+	int ret;
+
+	ret = nvmem_cell_read_variable_le_u32(dev, "speed-bin", &val);
+	if (ret) {
+		/*
+		 * -ENOENT means that this platform doesn't support speedbins
+		 * as it didn't declare any speed-bin nvmem: in this case, we
+		 * keep going without it; any other error means that we are
+		 * supposed to read the bin value, but we failed doing so.
+		 */
+		if (ret != -ENOENT && ret != -EOPNOTSUPP) {
+			DRM_DEV_ERROR(dev, "Cannot read speed-bin (%d).", ret);
+			return ret;
+		}
+
+		return 0;
+	}
+	DRM_DEV_DEBUG(dev, "Using speed-bin = 0x%x\n", val);
+
+	return devm_pm_opp_set_supported_hw(dev, &val, 1);
+}
+
+int panfrost_devfreq_init(struct panfrost_device *pfdev)
+{
+	int ret;
+	struct dev_pm_opp *opp;
+	unsigned long cur_freq;
+	struct device *dev = &pfdev->pdev->dev;
+	struct devfreq *devfreq;
+	struct thermal_cooling_device *cooling;
+	struct panfrost_devfreq *pfdevfreq = &pfdev->pfdevfreq;
+
+	if (pfdev->comp->num_supplies > 1) {
+		/*
+		 * GPUs with more than 1 supply require platform-specific handling:
+		 * continue without devfreq
+		 */
+		DRM_DEV_INFO(dev, "More than 1 supply is not supported yet\n");
+		return 0;
+	}
+
+	ret = panfrost_read_speedbin(dev);
+	if (ret)
+		return ret;
+
+	ret = devm_pm_opp_set_regulators(dev, pfdev->comp->supply_names);
+	if (ret) {
+		/* Continue if the optional regulator is missing */
+		if (ret != -ENODEV) {
+			if (ret != -EPROBE_DEFER)
+				DRM_DEV_ERROR(dev, "Couldn't set OPP regulators\n");
+			return ret;
+		}
+	}
+
+	ret = devm_pm_opp_of_add_table(dev);
+	if (ret) {
+		/* Optional, continue without devfreq */
+		if (ret == -ENODEV)
+			ret = 0;
+		return ret;
+	}
+	pfdevfreq->opp_of_table_added = true;
+
+	spin_lock_init(&pfdevfreq->lock);
+
+	panfrost_devfreq_reset(pfdevfreq);
+
+	cur_freq = clk_get_rate(pfdev->clock);
+
+	opp = devfreq_recommended_opp(dev, &cur_freq, 0);
+	if (IS_ERR(opp))
+		return PTR_ERR(opp);
+
+	panfrost_devfreq_profile.initial_freq = cur_freq;
+
+	/*
+	 * Set the recommend OPP this will enable and configure the regulator
+	 * if any and will avoid a switch off by regulator_late_cleanup()
+	 */
+	ret = dev_pm_opp_set_opp(dev, opp);
+	if (ret) {
+		DRM_DEV_ERROR(dev, "Couldn't set recommended OPP\n");
+		return ret;
+	}
+
+	dev_pm_opp_put(opp);
+
+	/*
+	 * Setup default thresholds for the simple_ondemand governor.
+	 * The values are chosen based on experiments.
+	 */
+	pfdevfreq->gov_data.upthreshold = 45;
+	pfdevfreq->gov_data.downdifferential = 5;
+
+	devfreq = devm_devfreq_add_device(dev, &panfrost_devfreq_profile,
+					  DEVFREQ_GOV_SIMPLE_ONDEMAND,
+					  &pfdevfreq->gov_data);
+	if (IS_ERR(devfreq)) {
+		DRM_DEV_ERROR(dev, "Couldn't initialize GPU devfreq\n");
+		return PTR_ERR(devfreq);
+	}
+	pfdevfreq->devfreq = devfreq;
+
+	cooling = devfreq_cooling_em_register(devfreq, NULL);
+	if (IS_ERR(cooling))
+		DRM_DEV_INFO(dev, "Failed to register cooling device\n");
+	else
+		pfdevfreq->cooling = cooling;
+
+	return 0;
+}
+
+void panfrost_devfreq_fini(struct panfrost_device *pfdev)
+{
+	struct panfrost_devfreq *pfdevfreq = &pfdev->pfdevfreq;
+
+	if (pfdevfreq->cooling) {
+		devfreq_cooling_unregister(pfdevfreq->cooling);
+		pfdevfreq->cooling = NULL;
+	}
+}
+
+void panfrost_devfreq_resume(struct panfrost_device *pfdev)
+{
+	struct panfrost_devfreq *pfdevfreq = &pfdev->pfdevfreq;
+
+	if (!pfdevfreq->devfreq)
+		return;
+
+	panfrost_devfreq_reset(pfdevfreq);
+
+	devfreq_resume_device(pfdevfreq->devfreq);
+}
+
+void panfrost_devfreq_suspend(struct panfrost_device *pfdev)
+{
+	struct panfrost_devfreq *pfdevfreq = &pfdev->pfdevfreq;
+
+	if (!pfdevfreq->devfreq)
+		return;
+
+	devfreq_suspend_device(pfdevfreq->devfreq);
+}
+
+void panfrost_devfreq_record_busy(struct panfrost_devfreq *pfdevfreq)
+{
+	unsigned long irqflags;
+
+	if (!pfdevfreq->devfreq)
+		return;
+
+	spin_lock_irqsave(&pfdevfreq->lock, irqflags);
+
+	panfrost_devfreq_update_utilization(pfdevfreq);
+
+	pfdevfreq->busy_count++;
+
+	spin_unlock_irqrestore(&pfdevfreq->lock, irqflags);
+}
+
+void panfrost_devfreq_record_idle(struct panfrost_devfreq *pfdevfreq)
+{
+	unsigned long irqflags;
+
+	if (!pfdevfreq->devfreq)
+		return;
+
+	spin_lock_irqsave(&pfdevfreq->lock, irqflags);
+
+	panfrost_devfreq_update_utilization(pfdevfreq);
+
+	WARN_ON(--pfdevfreq->busy_count < 0);
+
+	spin_unlock_irqrestore(&pfdevfreq->lock, irqflags);
+}
diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.h b/drivers/gpu/drm/panfrost/panfrost_devfreq.h
new file mode 100644
index 0000000000..1514c1f9d9
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2019 Collabora ltd. */
+
+#ifndef __PANFROST_DEVFREQ_H__
+#define __PANFROST_DEVFREQ_H__
+
+#include <linux/devfreq.h>
+#include <linux/spinlock.h>
+#include <linux/ktime.h>
+
+struct devfreq;
+struct thermal_cooling_device;
+
+struct panfrost_device;
+
+struct panfrost_devfreq {
+	struct devfreq *devfreq;
+	struct thermal_cooling_device *cooling;
+	struct devfreq_simple_ondemand_data gov_data;
+	bool opp_of_table_added;
+
+	ktime_t busy_time;
+	ktime_t idle_time;
+	ktime_t time_last_update;
+	int busy_count;
+	/*
+	 * Protect busy_time, idle_time, time_last_update and busy_count
+	 * because these can be updated concurrently between multiple jobs.
+	 */
+	spinlock_t lock;
+};
+
+int panfrost_devfreq_init(struct panfrost_device *pfdev);
+void panfrost_devfreq_fini(struct panfrost_device *pfdev);
+
+void panfrost_devfreq_resume(struct panfrost_device *pfdev);
+void panfrost_devfreq_suspend(struct panfrost_device *pfdev);
+
+void panfrost_devfreq_record_busy(struct panfrost_devfreq *devfreq);
+void panfrost_devfreq_record_idle(struct panfrost_devfreq *devfreq);
+
+#endif /* __PANFROST_DEVFREQ_H__ */
diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c
new file mode 100644
index 0000000000..fa1a086a86
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_device.c
@@ -0,0 +1,428 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+
+#include <linux/clk.h>
+#include <linux/reset.h>
+#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
+#include <linux/pm_runtime.h>
+#include <linux/regulator/consumer.h>
+
+#include "panfrost_device.h"
+#include "panfrost_devfreq.h"
+#include "panfrost_features.h"
+#include "panfrost_issues.h"
+#include "panfrost_gpu.h"
+#include "panfrost_job.h"
+#include "panfrost_mmu.h"
+#include "panfrost_perfcnt.h"
+
+static int panfrost_reset_init(struct panfrost_device *pfdev)
+{
+	pfdev->rstc = devm_reset_control_array_get_optional_exclusive(pfdev->dev);
+	if (IS_ERR(pfdev->rstc)) {
+		dev_err(pfdev->dev, "get reset failed %ld\n", PTR_ERR(pfdev->rstc));
+		return PTR_ERR(pfdev->rstc);
+	}
+
+	return reset_control_deassert(pfdev->rstc);
+}
+
+static void panfrost_reset_fini(struct panfrost_device *pfdev)
+{
+	reset_control_assert(pfdev->rstc);
+}
+
+static int panfrost_clk_init(struct panfrost_device *pfdev)
+{
+	int err;
+	unsigned long rate;
+
+	pfdev->clock = devm_clk_get(pfdev->dev, NULL);
+	if (IS_ERR(pfdev->clock)) {
+		dev_err(pfdev->dev, "get clock failed %ld\n", PTR_ERR(pfdev->clock));
+		return PTR_ERR(pfdev->clock);
+	}
+
+	rate = clk_get_rate(pfdev->clock);
+	dev_info(pfdev->dev, "clock rate = %lu\n", rate);
+
+	err = clk_prepare_enable(pfdev->clock);
+	if (err)
+		return err;
+
+	pfdev->bus_clock = devm_clk_get_optional(pfdev->dev, "bus");
+	if (IS_ERR(pfdev->bus_clock)) {
+		dev_err(pfdev->dev, "get bus_clock failed %ld\n",
+			PTR_ERR(pfdev->bus_clock));
+		err = PTR_ERR(pfdev->bus_clock);
+		goto disable_clock;
+	}
+
+	if (pfdev->bus_clock) {
+		rate = clk_get_rate(pfdev->bus_clock);
+		dev_info(pfdev->dev, "bus_clock rate = %lu\n", rate);
+
+		err = clk_prepare_enable(pfdev->bus_clock);
+		if (err)
+			goto disable_clock;
+	}
+
+	return 0;
+
+disable_clock:
+	clk_disable_unprepare(pfdev->clock);
+
+	return err;
+}
+
+static void panfrost_clk_fini(struct panfrost_device *pfdev)
+{
+	clk_disable_unprepare(pfdev->bus_clock);
+	clk_disable_unprepare(pfdev->clock);
+}
+
+static int panfrost_regulator_init(struct panfrost_device *pfdev)
+{
+	int ret, i;
+
+	pfdev->regulators = devm_kcalloc(pfdev->dev, pfdev->comp->num_supplies,
+					 sizeof(*pfdev->regulators),
+					 GFP_KERNEL);
+	if (!pfdev->regulators)
+		return -ENOMEM;
+
+	for (i = 0; i < pfdev->comp->num_supplies; i++)
+		pfdev->regulators[i].supply = pfdev->comp->supply_names[i];
+
+	ret = devm_regulator_bulk_get(pfdev->dev,
+				      pfdev->comp->num_supplies,
+				      pfdev->regulators);
+	if (ret < 0) {
+		if (ret != -EPROBE_DEFER)
+			dev_err(pfdev->dev, "failed to get regulators: %d\n",
+				ret);
+		return ret;
+	}
+
+	ret = regulator_bulk_enable(pfdev->comp->num_supplies,
+				    pfdev->regulators);
+	if (ret < 0) {
+		dev_err(pfdev->dev, "failed to enable regulators: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void panfrost_regulator_fini(struct panfrost_device *pfdev)
+{
+	if (!pfdev->regulators)
+		return;
+
+	regulator_bulk_disable(pfdev->comp->num_supplies, pfdev->regulators);
+}
+
+static void panfrost_pm_domain_fini(struct panfrost_device *pfdev)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(pfdev->pm_domain_devs); i++) {
+		if (!pfdev->pm_domain_devs[i])
+			break;
+
+		if (pfdev->pm_domain_links[i])
+			device_link_del(pfdev->pm_domain_links[i]);
+
+		dev_pm_domain_detach(pfdev->pm_domain_devs[i], true);
+	}
+}
+
+static int panfrost_pm_domain_init(struct panfrost_device *pfdev)
+{
+	int err;
+	int i, num_domains;
+
+	num_domains = of_count_phandle_with_args(pfdev->dev->of_node,
+						 "power-domains",
+						 "#power-domain-cells");
+
+	/*
+	 * Single domain is handled by the core, and, if only a single power
+	 * the power domain is requested, the property is optional.
+	 */
+	if (num_domains < 2 && pfdev->comp->num_pm_domains < 2)
+		return 0;
+
+	if (num_domains != pfdev->comp->num_pm_domains) {
+		dev_err(pfdev->dev,
+			"Incorrect number of power domains: %d provided, %d needed\n",
+			num_domains, pfdev->comp->num_pm_domains);
+		return -EINVAL;
+	}
+
+	if (WARN(num_domains > ARRAY_SIZE(pfdev->pm_domain_devs),
+			"Too many supplies in compatible structure.\n"))
+		return -EINVAL;
+
+	for (i = 0; i < num_domains; i++) {
+		pfdev->pm_domain_devs[i] =
+			dev_pm_domain_attach_by_name(pfdev->dev,
+					pfdev->comp->pm_domain_names[i]);
+		if (IS_ERR_OR_NULL(pfdev->pm_domain_devs[i])) {
+			err = PTR_ERR(pfdev->pm_domain_devs[i]) ? : -ENODATA;
+			pfdev->pm_domain_devs[i] = NULL;
+			dev_err(pfdev->dev,
+				"failed to get pm-domain %s(%d): %d\n",
+				pfdev->comp->pm_domain_names[i], i, err);
+			goto err;
+		}
+
+		pfdev->pm_domain_links[i] = device_link_add(pfdev->dev,
+				pfdev->pm_domain_devs[i], DL_FLAG_PM_RUNTIME |
+				DL_FLAG_STATELESS | DL_FLAG_RPM_ACTIVE);
+		if (!pfdev->pm_domain_links[i]) {
+			dev_err(pfdev->pm_domain_devs[i],
+				"adding device link failed!\n");
+			err = -ENODEV;
+			goto err;
+		}
+	}
+
+	return 0;
+
+err:
+	panfrost_pm_domain_fini(pfdev);
+	return err;
+}
+
+int panfrost_device_init(struct panfrost_device *pfdev)
+{
+	int err;
+
+	mutex_init(&pfdev->sched_lock);
+	INIT_LIST_HEAD(&pfdev->scheduled_jobs);
+	INIT_LIST_HEAD(&pfdev->as_lru_list);
+
+	spin_lock_init(&pfdev->as_lock);
+
+	err = panfrost_clk_init(pfdev);
+	if (err) {
+		dev_err(pfdev->dev, "clk init failed %d\n", err);
+		return err;
+	}
+
+	err = panfrost_devfreq_init(pfdev);
+	if (err) {
+		if (err != -EPROBE_DEFER)
+			dev_err(pfdev->dev, "devfreq init failed %d\n", err);
+		goto out_clk;
+	}
+
+	/* OPP will handle regulators */
+	if (!pfdev->pfdevfreq.opp_of_table_added) {
+		err = panfrost_regulator_init(pfdev);
+		if (err)
+			goto out_devfreq;
+	}
+
+	err = panfrost_reset_init(pfdev);
+	if (err) {
+		dev_err(pfdev->dev, "reset init failed %d\n", err);
+		goto out_regulator;
+	}
+
+	err = panfrost_pm_domain_init(pfdev);
+	if (err)
+		goto out_reset;
+
+	pfdev->iomem = devm_platform_ioremap_resource(pfdev->pdev, 0);
+	if (IS_ERR(pfdev->iomem)) {
+		err = PTR_ERR(pfdev->iomem);
+		goto out_pm_domain;
+	}
+
+	err = panfrost_gpu_init(pfdev);
+	if (err)
+		goto out_pm_domain;
+
+	err = panfrost_mmu_init(pfdev);
+	if (err)
+		goto out_gpu;
+
+	err = panfrost_job_init(pfdev);
+	if (err)
+		goto out_mmu;
+
+	err = panfrost_perfcnt_init(pfdev);
+	if (err)
+		goto out_job;
+
+	return 0;
+out_job:
+	panfrost_job_fini(pfdev);
+out_mmu:
+	panfrost_mmu_fini(pfdev);
+out_gpu:
+	panfrost_gpu_fini(pfdev);
+out_pm_domain:
+	panfrost_pm_domain_fini(pfdev);
+out_reset:
+	panfrost_reset_fini(pfdev);
+out_regulator:
+	panfrost_regulator_fini(pfdev);
+out_devfreq:
+	panfrost_devfreq_fini(pfdev);
+out_clk:
+	panfrost_clk_fini(pfdev);
+	return err;
+}
+
+void panfrost_device_fini(struct panfrost_device *pfdev)
+{
+	panfrost_perfcnt_fini(pfdev);
+	panfrost_job_fini(pfdev);
+	panfrost_mmu_fini(pfdev);
+	panfrost_gpu_fini(pfdev);
+	panfrost_pm_domain_fini(pfdev);
+	panfrost_reset_fini(pfdev);
+	panfrost_devfreq_fini(pfdev);
+	panfrost_regulator_fini(pfdev);
+	panfrost_clk_fini(pfdev);
+}
+
+#define PANFROST_EXCEPTION(id) \
+	[DRM_PANFROST_EXCEPTION_ ## id] = { \
+		.name = #id, \
+	}
+
+struct panfrost_exception_info {
+	const char *name;
+};
+
+static const struct panfrost_exception_info panfrost_exception_infos[] = {
+	PANFROST_EXCEPTION(OK),
+	PANFROST_EXCEPTION(DONE),
+	PANFROST_EXCEPTION(INTERRUPTED),
+	PANFROST_EXCEPTION(STOPPED),
+	PANFROST_EXCEPTION(TERMINATED),
+	PANFROST_EXCEPTION(KABOOM),
+	PANFROST_EXCEPTION(EUREKA),
+	PANFROST_EXCEPTION(ACTIVE),
+	PANFROST_EXCEPTION(JOB_CONFIG_FAULT),
+	PANFROST_EXCEPTION(JOB_POWER_FAULT),
+	PANFROST_EXCEPTION(JOB_READ_FAULT),
+	PANFROST_EXCEPTION(JOB_WRITE_FAULT),
+	PANFROST_EXCEPTION(JOB_AFFINITY_FAULT),
+	PANFROST_EXCEPTION(JOB_BUS_FAULT),
+	PANFROST_EXCEPTION(INSTR_INVALID_PC),
+	PANFROST_EXCEPTION(INSTR_INVALID_ENC),
+	PANFROST_EXCEPTION(INSTR_TYPE_MISMATCH),
+	PANFROST_EXCEPTION(INSTR_OPERAND_FAULT),
+	PANFROST_EXCEPTION(INSTR_TLS_FAULT),
+	PANFROST_EXCEPTION(INSTR_BARRIER_FAULT),
+	PANFROST_EXCEPTION(INSTR_ALIGN_FAULT),
+	PANFROST_EXCEPTION(DATA_INVALID_FAULT),
+	PANFROST_EXCEPTION(TILE_RANGE_FAULT),
+	PANFROST_EXCEPTION(ADDR_RANGE_FAULT),
+	PANFROST_EXCEPTION(IMPRECISE_FAULT),
+	PANFROST_EXCEPTION(OOM),
+	PANFROST_EXCEPTION(OOM_AFBC),
+	PANFROST_EXCEPTION(UNKNOWN),
+	PANFROST_EXCEPTION(DELAYED_BUS_FAULT),
+	PANFROST_EXCEPTION(GPU_SHAREABILITY_FAULT),
+	PANFROST_EXCEPTION(SYS_SHAREABILITY_FAULT),
+	PANFROST_EXCEPTION(GPU_CACHEABILITY_FAULT),
+	PANFROST_EXCEPTION(TRANSLATION_FAULT_0),
+	PANFROST_EXCEPTION(TRANSLATION_FAULT_1),
+	PANFROST_EXCEPTION(TRANSLATION_FAULT_2),
+	PANFROST_EXCEPTION(TRANSLATION_FAULT_3),
+	PANFROST_EXCEPTION(TRANSLATION_FAULT_4),
+	PANFROST_EXCEPTION(TRANSLATION_FAULT_IDENTITY),
+	PANFROST_EXCEPTION(PERM_FAULT_0),
+	PANFROST_EXCEPTION(PERM_FAULT_1),
+	PANFROST_EXCEPTION(PERM_FAULT_2),
+	PANFROST_EXCEPTION(PERM_FAULT_3),
+	PANFROST_EXCEPTION(TRANSTAB_BUS_FAULT_0),
+	PANFROST_EXCEPTION(TRANSTAB_BUS_FAULT_1),
+	PANFROST_EXCEPTION(TRANSTAB_BUS_FAULT_2),
+	PANFROST_EXCEPTION(TRANSTAB_BUS_FAULT_3),
+	PANFROST_EXCEPTION(ACCESS_FLAG_0),
+	PANFROST_EXCEPTION(ACCESS_FLAG_1),
+	PANFROST_EXCEPTION(ACCESS_FLAG_2),
+	PANFROST_EXCEPTION(ACCESS_FLAG_3),
+	PANFROST_EXCEPTION(ADDR_SIZE_FAULT_IN0),
+	PANFROST_EXCEPTION(ADDR_SIZE_FAULT_IN1),
+	PANFROST_EXCEPTION(ADDR_SIZE_FAULT_IN2),
+	PANFROST_EXCEPTION(ADDR_SIZE_FAULT_IN3),
+	PANFROST_EXCEPTION(ADDR_SIZE_FAULT_OUT0),
+	PANFROST_EXCEPTION(ADDR_SIZE_FAULT_OUT1),
+	PANFROST_EXCEPTION(ADDR_SIZE_FAULT_OUT2),
+	PANFROST_EXCEPTION(ADDR_SIZE_FAULT_OUT3),
+	PANFROST_EXCEPTION(MEM_ATTR_FAULT_0),
+	PANFROST_EXCEPTION(MEM_ATTR_FAULT_1),
+	PANFROST_EXCEPTION(MEM_ATTR_FAULT_2),
+	PANFROST_EXCEPTION(MEM_ATTR_FAULT_3),
+	PANFROST_EXCEPTION(MEM_ATTR_NONCACHE_0),
+	PANFROST_EXCEPTION(MEM_ATTR_NONCACHE_1),
+	PANFROST_EXCEPTION(MEM_ATTR_NONCACHE_2),
+	PANFROST_EXCEPTION(MEM_ATTR_NONCACHE_3),
+};
+
+const char *panfrost_exception_name(u32 exception_code)
+{
+	if (WARN_ON(exception_code >= ARRAY_SIZE(panfrost_exception_infos) ||
+		    !panfrost_exception_infos[exception_code].name))
+		return "Unknown exception type";
+
+	return panfrost_exception_infos[exception_code].name;
+}
+
+bool panfrost_exception_needs_reset(const struct panfrost_device *pfdev,
+				    u32 exception_code)
+{
+	/* If an occlusion query write causes a bus fault on affected GPUs,
+	 * future fragment jobs may hang. Reset to workaround.
+	 */
+	if (exception_code == DRM_PANFROST_EXCEPTION_JOB_BUS_FAULT)
+		return panfrost_has_hw_issue(pfdev, HW_ISSUE_TTRX_3076);
+
+	/* No other GPUs we support need a reset */
+	return false;
+}
+
+void panfrost_device_reset(struct panfrost_device *pfdev)
+{
+	panfrost_gpu_soft_reset(pfdev);
+
+	panfrost_gpu_power_on(pfdev);
+	panfrost_mmu_reset(pfdev);
+	panfrost_job_enable_interrupts(pfdev);
+}
+
+static int panfrost_device_resume(struct device *dev)
+{
+	struct panfrost_device *pfdev = dev_get_drvdata(dev);
+
+	panfrost_device_reset(pfdev);
+	panfrost_devfreq_resume(pfdev);
+
+	return 0;
+}
+
+static int panfrost_device_suspend(struct device *dev)
+{
+	struct panfrost_device *pfdev = dev_get_drvdata(dev);
+
+	if (!panfrost_job_is_idle(pfdev))
+		return -EBUSY;
+
+	panfrost_devfreq_suspend(pfdev);
+	panfrost_gpu_power_off(pfdev);
+
+	return 0;
+}
+
+EXPORT_GPL_RUNTIME_DEV_PM_OPS(panfrost_pm_ops, panfrost_device_suspend,
+			      panfrost_device_resume, NULL);
diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
new file mode 100644
index 0000000000..b0126b9fba
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_device.h
@@ -0,0 +1,265 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+
+#ifndef __PANFROST_DEVICE_H__
+#define __PANFROST_DEVICE_H__
+
+#include <linux/atomic.h>
+#include <linux/io-pgtable.h>
+#include <linux/pm.h>
+#include <linux/regulator/consumer.h>
+#include <linux/spinlock.h>
+#include <drm/drm_device.h>
+#include <drm/drm_mm.h>
+#include <drm/gpu_scheduler.h>
+
+#include "panfrost_devfreq.h"
+
+struct panfrost_device;
+struct panfrost_mmu;
+struct panfrost_job_slot;
+struct panfrost_job;
+struct panfrost_perfcnt;
+
+#define NUM_JOB_SLOTS 3
+#define MAX_PM_DOMAINS 5
+
+struct panfrost_features {
+	u16 id;
+	u16 revision;
+
+	u64 shader_present;
+	u64 tiler_present;
+	u64 l2_present;
+	u64 stack_present;
+	u32 as_present;
+	u32 js_present;
+
+	u32 l2_features;
+	u32 core_features;
+	u32 tiler_features;
+	u32 mem_features;
+	u32 mmu_features;
+	u32 thread_features;
+	u32 max_threads;
+	u32 thread_max_workgroup_sz;
+	u32 thread_max_barrier_sz;
+	u32 coherency_features;
+	u32 afbc_features;
+	u32 texture_features[4];
+	u32 js_features[16];
+
+	u32 nr_core_groups;
+	u32 thread_tls_alloc;
+
+	unsigned long hw_features[64 / BITS_PER_LONG];
+	unsigned long hw_issues[64 / BITS_PER_LONG];
+};
+
+/*
+ * Features that cannot be automatically detected and need matching using the
+ * compatible string, typically SoC-specific.
+ */
+struct panfrost_compatible {
+	/* Supplies count and names. */
+	int num_supplies;
+	const char * const *supply_names;
+	/*
+	 * Number of power domains required, note that values 0 and 1 are
+	 * handled identically, as only values > 1 need special handling.
+	 */
+	int num_pm_domains;
+	/* Only required if num_pm_domains > 1. */
+	const char * const *pm_domain_names;
+
+	/* Vendor implementation quirks callback */
+	void (*vendor_quirk)(struct panfrost_device *pfdev);
+};
+
+struct panfrost_device {
+	struct device *dev;
+	struct drm_device *ddev;
+	struct platform_device *pdev;
+
+	void __iomem *iomem;
+	struct clk *clock;
+	struct clk *bus_clock;
+	struct regulator_bulk_data *regulators;
+	struct reset_control *rstc;
+	/* pm_domains for devices with more than one. */
+	struct device *pm_domain_devs[MAX_PM_DOMAINS];
+	struct device_link *pm_domain_links[MAX_PM_DOMAINS];
+	bool coherent;
+
+	struct panfrost_features features;
+	const struct panfrost_compatible *comp;
+
+	spinlock_t as_lock;
+	unsigned long as_in_use_mask;
+	unsigned long as_alloc_mask;
+	unsigned long as_faulty_mask;
+	struct list_head as_lru_list;
+
+	struct panfrost_job_slot *js;
+
+	struct panfrost_job *jobs[NUM_JOB_SLOTS][2];
+	struct list_head scheduled_jobs;
+
+	struct panfrost_perfcnt *perfcnt;
+
+	struct mutex sched_lock;
+
+	struct {
+		struct workqueue_struct *wq;
+		struct work_struct work;
+		atomic_t pending;
+	} reset;
+
+	struct mutex shrinker_lock;
+	struct list_head shrinker_list;
+	struct shrinker shrinker;
+
+	struct panfrost_devfreq pfdevfreq;
+};
+
+struct panfrost_mmu {
+	struct panfrost_device *pfdev;
+	struct kref refcount;
+	struct io_pgtable_cfg pgtbl_cfg;
+	struct io_pgtable_ops *pgtbl_ops;
+	struct drm_mm mm;
+	spinlock_t mm_lock;
+	int as;
+	atomic_t as_count;
+	struct list_head list;
+};
+
+struct panfrost_file_priv {
+	struct panfrost_device *pfdev;
+
+	struct drm_sched_entity sched_entity[NUM_JOB_SLOTS];
+
+	struct panfrost_mmu *mmu;
+};
+
+static inline struct panfrost_device *to_panfrost_device(struct drm_device *ddev)
+{
+	return ddev->dev_private;
+}
+
+static inline int panfrost_model_cmp(struct panfrost_device *pfdev, s32 id)
+{
+	s32 match_id = pfdev->features.id;
+
+	if (match_id & 0xf000)
+		match_id &= 0xf00f;
+	return match_id - id;
+}
+
+static inline bool panfrost_model_is_bifrost(struct panfrost_device *pfdev)
+{
+	return panfrost_model_cmp(pfdev, 0x1000) >= 0;
+}
+
+static inline bool panfrost_model_eq(struct panfrost_device *pfdev, s32 id)
+{
+	return !panfrost_model_cmp(pfdev, id);
+}
+
+int panfrost_unstable_ioctl_check(void);
+
+int panfrost_device_init(struct panfrost_device *pfdev);
+void panfrost_device_fini(struct panfrost_device *pfdev);
+void panfrost_device_reset(struct panfrost_device *pfdev);
+
+extern const struct dev_pm_ops panfrost_pm_ops;
+
+enum drm_panfrost_exception_type {
+	DRM_PANFROST_EXCEPTION_OK = 0x00,
+	DRM_PANFROST_EXCEPTION_DONE = 0x01,
+	DRM_PANFROST_EXCEPTION_INTERRUPTED = 0x02,
+	DRM_PANFROST_EXCEPTION_STOPPED = 0x03,
+	DRM_PANFROST_EXCEPTION_TERMINATED = 0x04,
+	DRM_PANFROST_EXCEPTION_KABOOM = 0x05,
+	DRM_PANFROST_EXCEPTION_EUREKA = 0x06,
+	DRM_PANFROST_EXCEPTION_ACTIVE = 0x08,
+	DRM_PANFROST_EXCEPTION_MAX_NON_FAULT = 0x3f,
+	DRM_PANFROST_EXCEPTION_JOB_CONFIG_FAULT = 0x40,
+	DRM_PANFROST_EXCEPTION_JOB_POWER_FAULT = 0x41,
+	DRM_PANFROST_EXCEPTION_JOB_READ_FAULT = 0x42,
+	DRM_PANFROST_EXCEPTION_JOB_WRITE_FAULT = 0x43,
+	DRM_PANFROST_EXCEPTION_JOB_AFFINITY_FAULT = 0x44,
+	DRM_PANFROST_EXCEPTION_JOB_BUS_FAULT = 0x48,
+	DRM_PANFROST_EXCEPTION_INSTR_INVALID_PC = 0x50,
+	DRM_PANFROST_EXCEPTION_INSTR_INVALID_ENC = 0x51,
+	DRM_PANFROST_EXCEPTION_INSTR_TYPE_MISMATCH = 0x52,
+	DRM_PANFROST_EXCEPTION_INSTR_OPERAND_FAULT = 0x53,
+	DRM_PANFROST_EXCEPTION_INSTR_TLS_FAULT = 0x54,
+	DRM_PANFROST_EXCEPTION_INSTR_BARRIER_FAULT = 0x55,
+	DRM_PANFROST_EXCEPTION_INSTR_ALIGN_FAULT = 0x56,
+	DRM_PANFROST_EXCEPTION_DATA_INVALID_FAULT = 0x58,
+	DRM_PANFROST_EXCEPTION_TILE_RANGE_FAULT = 0x59,
+	DRM_PANFROST_EXCEPTION_ADDR_RANGE_FAULT = 0x5a,
+	DRM_PANFROST_EXCEPTION_IMPRECISE_FAULT = 0x5b,
+	DRM_PANFROST_EXCEPTION_OOM = 0x60,
+	DRM_PANFROST_EXCEPTION_OOM_AFBC = 0x61,
+	DRM_PANFROST_EXCEPTION_UNKNOWN = 0x7f,
+	DRM_PANFROST_EXCEPTION_DELAYED_BUS_FAULT = 0x80,
+	DRM_PANFROST_EXCEPTION_GPU_SHAREABILITY_FAULT = 0x88,
+	DRM_PANFROST_EXCEPTION_SYS_SHAREABILITY_FAULT = 0x89,
+	DRM_PANFROST_EXCEPTION_GPU_CACHEABILITY_FAULT = 0x8a,
+	DRM_PANFROST_EXCEPTION_TRANSLATION_FAULT_0 = 0xc0,
+	DRM_PANFROST_EXCEPTION_TRANSLATION_FAULT_1 = 0xc1,
+	DRM_PANFROST_EXCEPTION_TRANSLATION_FAULT_2 = 0xc2,
+	DRM_PANFROST_EXCEPTION_TRANSLATION_FAULT_3 = 0xc3,
+	DRM_PANFROST_EXCEPTION_TRANSLATION_FAULT_4 = 0xc4,
+	DRM_PANFROST_EXCEPTION_TRANSLATION_FAULT_IDENTITY = 0xc7,
+	DRM_PANFROST_EXCEPTION_PERM_FAULT_0 = 0xc8,
+	DRM_PANFROST_EXCEPTION_PERM_FAULT_1 = 0xc9,
+	DRM_PANFROST_EXCEPTION_PERM_FAULT_2 = 0xca,
+	DRM_PANFROST_EXCEPTION_PERM_FAULT_3 = 0xcb,
+	DRM_PANFROST_EXCEPTION_TRANSTAB_BUS_FAULT_0 = 0xd0,
+	DRM_PANFROST_EXCEPTION_TRANSTAB_BUS_FAULT_1 = 0xd1,
+	DRM_PANFROST_EXCEPTION_TRANSTAB_BUS_FAULT_2 = 0xd2,
+	DRM_PANFROST_EXCEPTION_TRANSTAB_BUS_FAULT_3 = 0xd3,
+	DRM_PANFROST_EXCEPTION_ACCESS_FLAG_0 = 0xd8,
+	DRM_PANFROST_EXCEPTION_ACCESS_FLAG_1 = 0xd9,
+	DRM_PANFROST_EXCEPTION_ACCESS_FLAG_2 = 0xda,
+	DRM_PANFROST_EXCEPTION_ACCESS_FLAG_3 = 0xdb,
+	DRM_PANFROST_EXCEPTION_ADDR_SIZE_FAULT_IN0 = 0xe0,
+	DRM_PANFROST_EXCEPTION_ADDR_SIZE_FAULT_IN1 = 0xe1,
+	DRM_PANFROST_EXCEPTION_ADDR_SIZE_FAULT_IN2 = 0xe2,
+	DRM_PANFROST_EXCEPTION_ADDR_SIZE_FAULT_IN3 = 0xe3,
+	DRM_PANFROST_EXCEPTION_ADDR_SIZE_FAULT_OUT0 = 0xe4,
+	DRM_PANFROST_EXCEPTION_ADDR_SIZE_FAULT_OUT1 = 0xe5,
+	DRM_PANFROST_EXCEPTION_ADDR_SIZE_FAULT_OUT2 = 0xe6,
+	DRM_PANFROST_EXCEPTION_ADDR_SIZE_FAULT_OUT3 = 0xe7,
+	DRM_PANFROST_EXCEPTION_MEM_ATTR_FAULT_0 = 0xe8,
+	DRM_PANFROST_EXCEPTION_MEM_ATTR_FAULT_1 = 0xe9,
+	DRM_PANFROST_EXCEPTION_MEM_ATTR_FAULT_2 = 0xea,
+	DRM_PANFROST_EXCEPTION_MEM_ATTR_FAULT_3 = 0xeb,
+	DRM_PANFROST_EXCEPTION_MEM_ATTR_NONCACHE_0 = 0xec,
+	DRM_PANFROST_EXCEPTION_MEM_ATTR_NONCACHE_1 = 0xed,
+	DRM_PANFROST_EXCEPTION_MEM_ATTR_NONCACHE_2 = 0xee,
+	DRM_PANFROST_EXCEPTION_MEM_ATTR_NONCACHE_3 = 0xef,
+};
+
+static inline bool
+panfrost_exception_is_fault(u32 exception_code)
+{
+	return exception_code > DRM_PANFROST_EXCEPTION_MAX_NON_FAULT;
+}
+
+const char *panfrost_exception_name(u32 exception_code);
+bool panfrost_exception_needs_reset(const struct panfrost_device *pfdev,
+				    u32 exception_code);
+
+static inline void
+panfrost_device_schedule_reset(struct panfrost_device *pfdev)
+{
+	atomic_set(&pfdev->reset.pending, 1);
+	queue_work(pfdev->reset.wq, &pfdev->reset.work);
+}
+
+#endif
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
new file mode 100644
index 0000000000..a2ab99698c
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -0,0 +1,733 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */
+/* Copyright 2019 Collabora ltd. */
+
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/pagemap.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <drm/panfrost_drm.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_syncobj.h>
+#include <drm/drm_utils.h>
+
+#include "panfrost_device.h"
+#include "panfrost_gem.h"
+#include "panfrost_mmu.h"
+#include "panfrost_job.h"
+#include "panfrost_gpu.h"
+#include "panfrost_perfcnt.h"
+
+static bool unstable_ioctls;
+module_param_unsafe(unstable_ioctls, bool, 0600);
+
+static int panfrost_ioctl_get_param(struct drm_device *ddev, void *data, struct drm_file *file)
+{
+	struct drm_panfrost_get_param *param = data;
+	struct panfrost_device *pfdev = ddev->dev_private;
+
+	if (param->pad != 0)
+		return -EINVAL;
+
+#define PANFROST_FEATURE(name, member)			\
+	case DRM_PANFROST_PARAM_ ## name:		\
+		param->value = pfdev->features.member;	\
+		break
+#define PANFROST_FEATURE_ARRAY(name, member, max)			\
+	case DRM_PANFROST_PARAM_ ## name ## 0 ...			\
+		DRM_PANFROST_PARAM_ ## name ## max:			\
+		param->value = pfdev->features.member[param->param -	\
+			DRM_PANFROST_PARAM_ ## name ## 0];		\
+		break
+
+	switch (param->param) {
+		PANFROST_FEATURE(GPU_PROD_ID, id);
+		PANFROST_FEATURE(GPU_REVISION, revision);
+		PANFROST_FEATURE(SHADER_PRESENT, shader_present);
+		PANFROST_FEATURE(TILER_PRESENT, tiler_present);
+		PANFROST_FEATURE(L2_PRESENT, l2_present);
+		PANFROST_FEATURE(STACK_PRESENT, stack_present);
+		PANFROST_FEATURE(AS_PRESENT, as_present);
+		PANFROST_FEATURE(JS_PRESENT, js_present);
+		PANFROST_FEATURE(L2_FEATURES, l2_features);
+		PANFROST_FEATURE(CORE_FEATURES, core_features);
+		PANFROST_FEATURE(TILER_FEATURES, tiler_features);
+		PANFROST_FEATURE(MEM_FEATURES, mem_features);
+		PANFROST_FEATURE(MMU_FEATURES, mmu_features);
+		PANFROST_FEATURE(THREAD_FEATURES, thread_features);
+		PANFROST_FEATURE(MAX_THREADS, max_threads);
+		PANFROST_FEATURE(THREAD_MAX_WORKGROUP_SZ,
+				thread_max_workgroup_sz);
+		PANFROST_FEATURE(THREAD_MAX_BARRIER_SZ,
+				thread_max_barrier_sz);
+		PANFROST_FEATURE(COHERENCY_FEATURES, coherency_features);
+		PANFROST_FEATURE(AFBC_FEATURES, afbc_features);
+		PANFROST_FEATURE_ARRAY(TEXTURE_FEATURES, texture_features, 3);
+		PANFROST_FEATURE_ARRAY(JS_FEATURES, js_features, 15);
+		PANFROST_FEATURE(NR_CORE_GROUPS, nr_core_groups);
+		PANFROST_FEATURE(THREAD_TLS_ALLOC, thread_tls_alloc);
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	struct panfrost_file_priv *priv = file->driver_priv;
+	struct panfrost_gem_object *bo;
+	struct drm_panfrost_create_bo *args = data;
+	struct panfrost_gem_mapping *mapping;
+	int ret;
+
+	if (!args->size || args->pad ||
+	    (args->flags & ~(PANFROST_BO_NOEXEC | PANFROST_BO_HEAP)))
+		return -EINVAL;
+
+	/* Heaps should never be executable */
+	if ((args->flags & PANFROST_BO_HEAP) &&
+	    !(args->flags & PANFROST_BO_NOEXEC))
+		return -EINVAL;
+
+	bo = panfrost_gem_create(dev, args->size, args->flags);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	ret = drm_gem_handle_create(file, &bo->base.base, &args->handle);
+	if (ret)
+		goto out;
+
+	mapping = panfrost_gem_mapping_get(bo, priv);
+	if (mapping) {
+		args->offset = mapping->mmnode.start << PAGE_SHIFT;
+		panfrost_gem_mapping_put(mapping);
+	} else {
+		/* This can only happen if the handle from
+		 * drm_gem_handle_create() has already been guessed and freed
+		 * by user space
+		 */
+		ret = -EINVAL;
+	}
+
+out:
+	drm_gem_object_put(&bo->base.base);
+	return ret;
+}
+
+/**
+ * panfrost_lookup_bos() - Sets up job->bo[] with the GEM objects
+ * referenced by the job.
+ * @dev: DRM device
+ * @file_priv: DRM file for this fd
+ * @args: IOCTL args
+ * @job: job being set up
+ *
+ * Resolve handles from userspace to BOs and attach them to job.
+ *
+ * Note that this function doesn't need to unreference the BOs on
+ * failure, because that will happen at panfrost_job_cleanup() time.
+ */
+static int
+panfrost_lookup_bos(struct drm_device *dev,
+		  struct drm_file *file_priv,
+		  struct drm_panfrost_submit *args,
+		  struct panfrost_job *job)
+{
+	struct panfrost_file_priv *priv = file_priv->driver_priv;
+	struct panfrost_gem_object *bo;
+	unsigned int i;
+	int ret;
+
+	job->bo_count = args->bo_handle_count;
+
+	if (!job->bo_count)
+		return 0;
+
+	ret = drm_gem_objects_lookup(file_priv,
+				     (void __user *)(uintptr_t)args->bo_handles,
+				     job->bo_count, &job->bos);
+	if (ret)
+		return ret;
+
+	job->mappings = kvmalloc_array(job->bo_count,
+				       sizeof(struct panfrost_gem_mapping *),
+				       GFP_KERNEL | __GFP_ZERO);
+	if (!job->mappings)
+		return -ENOMEM;
+
+	for (i = 0; i < job->bo_count; i++) {
+		struct panfrost_gem_mapping *mapping;
+
+		bo = to_panfrost_bo(job->bos[i]);
+		mapping = panfrost_gem_mapping_get(bo, priv);
+		if (!mapping) {
+			ret = -EINVAL;
+			break;
+		}
+
+		atomic_inc(&bo->gpu_usecount);
+		job->mappings[i] = mapping;
+	}
+
+	return ret;
+}
+
+/**
+ * panfrost_copy_in_sync() - Sets up job->deps with the sync objects
+ * referenced by the job.
+ * @dev: DRM device
+ * @file_priv: DRM file for this fd
+ * @args: IOCTL args
+ * @job: job being set up
+ *
+ * Resolve syncobjs from userspace to fences and attach them to job.
+ *
+ * Note that this function doesn't need to unreference the fences on
+ * failure, because that will happen at panfrost_job_cleanup() time.
+ */
+static int
+panfrost_copy_in_sync(struct drm_device *dev,
+		  struct drm_file *file_priv,
+		  struct drm_panfrost_submit *args,
+		  struct panfrost_job *job)
+{
+	u32 *handles;
+	int ret = 0;
+	int i, in_fence_count;
+
+	in_fence_count = args->in_sync_count;
+
+	if (!in_fence_count)
+		return 0;
+
+	handles = kvmalloc_array(in_fence_count, sizeof(u32), GFP_KERNEL);
+	if (!handles) {
+		ret = -ENOMEM;
+		DRM_DEBUG("Failed to allocate incoming syncobj handles\n");
+		goto fail;
+	}
+
+	if (copy_from_user(handles,
+			   (void __user *)(uintptr_t)args->in_syncs,
+			   in_fence_count * sizeof(u32))) {
+		ret = -EFAULT;
+		DRM_DEBUG("Failed to copy in syncobj handles\n");
+		goto fail;
+	}
+
+	for (i = 0; i < in_fence_count; i++) {
+		ret = drm_sched_job_add_syncobj_dependency(&job->base, file_priv,
+							   handles[i], 0);
+		if (ret)
+			goto fail;
+	}
+
+fail:
+	kvfree(handles);
+	return ret;
+}
+
+static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	struct panfrost_device *pfdev = dev->dev_private;
+	struct panfrost_file_priv *file_priv = file->driver_priv;
+	struct drm_panfrost_submit *args = data;
+	struct drm_syncobj *sync_out = NULL;
+	struct panfrost_job *job;
+	int ret = 0, slot;
+
+	if (!args->jc)
+		return -EINVAL;
+
+	if (args->requirements && args->requirements != PANFROST_JD_REQ_FS)
+		return -EINVAL;
+
+	if (args->out_sync > 0) {
+		sync_out = drm_syncobj_find(file, args->out_sync);
+		if (!sync_out)
+			return -ENODEV;
+	}
+
+	job = kzalloc(sizeof(*job), GFP_KERNEL);
+	if (!job) {
+		ret = -ENOMEM;
+		goto out_put_syncout;
+	}
+
+	kref_init(&job->refcount);
+
+	job->pfdev = pfdev;
+	job->jc = args->jc;
+	job->requirements = args->requirements;
+	job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev);
+	job->mmu = file_priv->mmu;
+
+	slot = panfrost_job_get_slot(job);
+
+	ret = drm_sched_job_init(&job->base,
+				 &file_priv->sched_entity[slot],
+				 NULL);
+	if (ret)
+		goto out_put_job;
+
+	ret = panfrost_copy_in_sync(dev, file, args, job);
+	if (ret)
+		goto out_cleanup_job;
+
+	ret = panfrost_lookup_bos(dev, file, args, job);
+	if (ret)
+		goto out_cleanup_job;
+
+	ret = panfrost_job_push(job);
+	if (ret)
+		goto out_cleanup_job;
+
+	/* Update the return sync object for the job */
+	if (sync_out)
+		drm_syncobj_replace_fence(sync_out, job->render_done_fence);
+
+out_cleanup_job:
+	if (ret)
+		drm_sched_job_cleanup(&job->base);
+out_put_job:
+	panfrost_job_put(job);
+out_put_syncout:
+	if (sync_out)
+		drm_syncobj_put(sync_out);
+
+	return ret;
+}
+
+static int
+panfrost_ioctl_wait_bo(struct drm_device *dev, void *data,
+		       struct drm_file *file_priv)
+{
+	long ret;
+	struct drm_panfrost_wait_bo *args = data;
+	struct drm_gem_object *gem_obj;
+	unsigned long timeout = drm_timeout_abs_to_jiffies(args->timeout_ns);
+
+	if (args->pad)
+		return -EINVAL;
+
+	gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+	if (!gem_obj)
+		return -ENOENT;
+
+	ret = dma_resv_wait_timeout(gem_obj->resv, DMA_RESV_USAGE_READ,
+				    true, timeout);
+	if (!ret)
+		ret = timeout ? -ETIMEDOUT : -EBUSY;
+
+	drm_gem_object_put(gem_obj);
+
+	return ret;
+}
+
+static int panfrost_ioctl_mmap_bo(struct drm_device *dev, void *data,
+		      struct drm_file *file_priv)
+{
+	struct drm_panfrost_mmap_bo *args = data;
+	struct drm_gem_object *gem_obj;
+	int ret;
+
+	if (args->flags != 0) {
+		DRM_INFO("unknown mmap_bo flags: %d\n", args->flags);
+		return -EINVAL;
+	}
+
+	gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+	if (!gem_obj) {
+		DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
+		return -ENOENT;
+	}
+
+	/* Don't allow mmapping of heap objects as pages are not pinned. */
+	if (to_panfrost_bo(gem_obj)->is_heap) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = drm_gem_create_mmap_offset(gem_obj);
+	if (ret == 0)
+		args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
+
+out:
+	drm_gem_object_put(gem_obj);
+	return ret;
+}
+
+static int panfrost_ioctl_get_bo_offset(struct drm_device *dev, void *data,
+			    struct drm_file *file_priv)
+{
+	struct panfrost_file_priv *priv = file_priv->driver_priv;
+	struct drm_panfrost_get_bo_offset *args = data;
+	struct panfrost_gem_mapping *mapping;
+	struct drm_gem_object *gem_obj;
+	struct panfrost_gem_object *bo;
+
+	gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+	if (!gem_obj) {
+		DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
+		return -ENOENT;
+	}
+	bo = to_panfrost_bo(gem_obj);
+
+	mapping = panfrost_gem_mapping_get(bo, priv);
+	drm_gem_object_put(gem_obj);
+
+	if (!mapping)
+		return -EINVAL;
+
+	args->offset = mapping->mmnode.start << PAGE_SHIFT;
+	panfrost_gem_mapping_put(mapping);
+	return 0;
+}
+
+static int panfrost_ioctl_madvise(struct drm_device *dev, void *data,
+				  struct drm_file *file_priv)
+{
+	struct panfrost_file_priv *priv = file_priv->driver_priv;
+	struct drm_panfrost_madvise *args = data;
+	struct panfrost_device *pfdev = dev->dev_private;
+	struct drm_gem_object *gem_obj;
+	struct panfrost_gem_object *bo;
+	int ret = 0;
+
+	gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+	if (!gem_obj) {
+		DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
+		return -ENOENT;
+	}
+
+	bo = to_panfrost_bo(gem_obj);
+
+	ret = dma_resv_lock_interruptible(bo->base.base.resv, NULL);
+	if (ret)
+		goto out_put_object;
+
+	mutex_lock(&pfdev->shrinker_lock);
+	mutex_lock(&bo->mappings.lock);
+	if (args->madv == PANFROST_MADV_DONTNEED) {
+		struct panfrost_gem_mapping *first;
+
+		first = list_first_entry(&bo->mappings.list,
+					 struct panfrost_gem_mapping,
+					 node);
+
+		/*
+		 * If we want to mark the BO purgeable, there must be only one
+		 * user: the caller FD.
+		 * We could do something smarter and mark the BO purgeable only
+		 * when all its users have marked it purgeable, but globally
+		 * visible/shared BOs are likely to never be marked purgeable
+		 * anyway, so let's not bother.
+		 */
+		if (!list_is_singular(&bo->mappings.list) ||
+		    WARN_ON_ONCE(first->mmu != priv->mmu)) {
+			ret = -EINVAL;
+			goto out_unlock_mappings;
+		}
+	}
+
+	args->retained = drm_gem_shmem_madvise(&bo->base, args->madv);
+
+	if (args->retained) {
+		if (args->madv == PANFROST_MADV_DONTNEED)
+			list_move_tail(&bo->base.madv_list,
+				       &pfdev->shrinker_list);
+		else if (args->madv == PANFROST_MADV_WILLNEED)
+			list_del_init(&bo->base.madv_list);
+	}
+
+out_unlock_mappings:
+	mutex_unlock(&bo->mappings.lock);
+	mutex_unlock(&pfdev->shrinker_lock);
+	dma_resv_unlock(bo->base.base.resv);
+out_put_object:
+	drm_gem_object_put(gem_obj);
+	return ret;
+}
+
+int panfrost_unstable_ioctl_check(void)
+{
+	if (!unstable_ioctls)
+		return -ENOSYS;
+
+	return 0;
+}
+
+static int
+panfrost_open(struct drm_device *dev, struct drm_file *file)
+{
+	int ret;
+	struct panfrost_device *pfdev = dev->dev_private;
+	struct panfrost_file_priv *panfrost_priv;
+
+	panfrost_priv = kzalloc(sizeof(*panfrost_priv), GFP_KERNEL);
+	if (!panfrost_priv)
+		return -ENOMEM;
+
+	panfrost_priv->pfdev = pfdev;
+	file->driver_priv = panfrost_priv;
+
+	panfrost_priv->mmu = panfrost_mmu_ctx_create(pfdev);
+	if (IS_ERR(panfrost_priv->mmu)) {
+		ret = PTR_ERR(panfrost_priv->mmu);
+		goto err_free;
+	}
+
+	ret = panfrost_job_open(panfrost_priv);
+	if (ret)
+		goto err_job;
+
+	return 0;
+
+err_job:
+	panfrost_mmu_ctx_put(panfrost_priv->mmu);
+err_free:
+	kfree(panfrost_priv);
+	return ret;
+}
+
+static void
+panfrost_postclose(struct drm_device *dev, struct drm_file *file)
+{
+	struct panfrost_file_priv *panfrost_priv = file->driver_priv;
+
+	panfrost_perfcnt_close(file);
+	panfrost_job_close(panfrost_priv);
+
+	panfrost_mmu_ctx_put(panfrost_priv->mmu);
+	kfree(panfrost_priv);
+}
+
+static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = {
+#define PANFROST_IOCTL(n, func, flags) \
+	DRM_IOCTL_DEF_DRV(PANFROST_##n, panfrost_ioctl_##func, flags)
+
+	PANFROST_IOCTL(SUBMIT,		submit,		DRM_RENDER_ALLOW),
+	PANFROST_IOCTL(WAIT_BO,		wait_bo,	DRM_RENDER_ALLOW),
+	PANFROST_IOCTL(CREATE_BO,	create_bo,	DRM_RENDER_ALLOW),
+	PANFROST_IOCTL(MMAP_BO,		mmap_bo,	DRM_RENDER_ALLOW),
+	PANFROST_IOCTL(GET_PARAM,	get_param,	DRM_RENDER_ALLOW),
+	PANFROST_IOCTL(GET_BO_OFFSET,	get_bo_offset,	DRM_RENDER_ALLOW),
+	PANFROST_IOCTL(PERFCNT_ENABLE,	perfcnt_enable,	DRM_RENDER_ALLOW),
+	PANFROST_IOCTL(PERFCNT_DUMP,	perfcnt_dump,	DRM_RENDER_ALLOW),
+	PANFROST_IOCTL(MADVISE,		madvise,	DRM_RENDER_ALLOW),
+};
+
+DEFINE_DRM_GEM_FOPS(panfrost_drm_driver_fops);
+
+/*
+ * Panfrost driver version:
+ * - 1.0 - initial interface
+ * - 1.1 - adds HEAP and NOEXEC flags for CREATE_BO
+ * - 1.2 - adds AFBC_FEATURES query
+ */
+static const struct drm_driver panfrost_drm_driver = {
+	.driver_features	= DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ,
+	.open			= panfrost_open,
+	.postclose		= panfrost_postclose,
+	.ioctls			= panfrost_drm_driver_ioctls,
+	.num_ioctls		= ARRAY_SIZE(panfrost_drm_driver_ioctls),
+	.fops			= &panfrost_drm_driver_fops,
+	.name			= "panfrost",
+	.desc			= "panfrost DRM",
+	.date			= "20180908",
+	.major			= 1,
+	.minor			= 2,
+
+	.gem_create_object	= panfrost_gem_create_object,
+	.gem_prime_import_sg_table = panfrost_gem_prime_import_sg_table,
+};
+
+static int panfrost_probe(struct platform_device *pdev)
+{
+	struct panfrost_device *pfdev;
+	struct drm_device *ddev;
+	int err;
+
+	pfdev = devm_kzalloc(&pdev->dev, sizeof(*pfdev), GFP_KERNEL);
+	if (!pfdev)
+		return -ENOMEM;
+
+	pfdev->pdev = pdev;
+	pfdev->dev = &pdev->dev;
+
+	platform_set_drvdata(pdev, pfdev);
+
+	pfdev->comp = of_device_get_match_data(&pdev->dev);
+	if (!pfdev->comp)
+		return -ENODEV;
+
+	pfdev->coherent = device_get_dma_attr(&pdev->dev) == DEV_DMA_COHERENT;
+
+	/* Allocate and initialize the DRM device. */
+	ddev = drm_dev_alloc(&panfrost_drm_driver, &pdev->dev);
+	if (IS_ERR(ddev))
+		return PTR_ERR(ddev);
+
+	ddev->dev_private = pfdev;
+	pfdev->ddev = ddev;
+
+	mutex_init(&pfdev->shrinker_lock);
+	INIT_LIST_HEAD(&pfdev->shrinker_list);
+
+	err = panfrost_device_init(pfdev);
+	if (err) {
+		if (err != -EPROBE_DEFER)
+			dev_err(&pdev->dev, "Fatal error during GPU init\n");
+		goto err_out0;
+	}
+
+	pm_runtime_set_active(pfdev->dev);
+	pm_runtime_mark_last_busy(pfdev->dev);
+	pm_runtime_enable(pfdev->dev);
+	pm_runtime_set_autosuspend_delay(pfdev->dev, 50); /* ~3 frames */
+	pm_runtime_use_autosuspend(pfdev->dev);
+
+	/*
+	 * Register the DRM device with the core and the connectors with
+	 * sysfs
+	 */
+	err = drm_dev_register(ddev, 0);
+	if (err < 0)
+		goto err_out1;
+
+	panfrost_gem_shrinker_init(ddev);
+
+	return 0;
+
+err_out1:
+	pm_runtime_disable(pfdev->dev);
+	panfrost_device_fini(pfdev);
+	pm_runtime_set_suspended(pfdev->dev);
+err_out0:
+	drm_dev_put(ddev);
+	return err;
+}
+
+static void panfrost_remove(struct platform_device *pdev)
+{
+	struct panfrost_device *pfdev = platform_get_drvdata(pdev);
+	struct drm_device *ddev = pfdev->ddev;
+
+	drm_dev_unregister(ddev);
+	panfrost_gem_shrinker_cleanup(ddev);
+
+	pm_runtime_get_sync(pfdev->dev);
+	pm_runtime_disable(pfdev->dev);
+	panfrost_device_fini(pfdev);
+	pm_runtime_set_suspended(pfdev->dev);
+
+	drm_dev_put(ddev);
+}
+
+/*
+ * The OPP core wants the supply names to be NULL terminated, but we need the
+ * correct num_supplies value for regulator core. Hence, we NULL terminate here
+ * and then initialize num_supplies with ARRAY_SIZE - 1.
+ */
+static const char * const default_supplies[] = { "mali", NULL };
+static const struct panfrost_compatible default_data = {
+	.num_supplies = ARRAY_SIZE(default_supplies) - 1,
+	.supply_names = default_supplies,
+	.num_pm_domains = 1, /* optional */
+	.pm_domain_names = NULL,
+};
+
+static const struct panfrost_compatible amlogic_data = {
+	.num_supplies = ARRAY_SIZE(default_supplies) - 1,
+	.supply_names = default_supplies,
+	.vendor_quirk = panfrost_gpu_amlogic_quirk,
+};
+
+/*
+ * The old data with two power supplies for MT8183 is here only to
+ * keep retro-compatibility with older devicetrees, as DVFS will
+ * not work with this one.
+ *
+ * On new devicetrees please use the _b variant with a single and
+ * coupled regulators instead.
+ */
+static const char * const mediatek_mt8183_supplies[] = { "mali", "sram", NULL };
+static const char * const mediatek_mt8183_pm_domains[] = { "core0", "core1", "core2" };
+static const struct panfrost_compatible mediatek_mt8183_data = {
+	.num_supplies = ARRAY_SIZE(mediatek_mt8183_supplies) - 1,
+	.supply_names = mediatek_mt8183_supplies,
+	.num_pm_domains = ARRAY_SIZE(mediatek_mt8183_pm_domains),
+	.pm_domain_names = mediatek_mt8183_pm_domains,
+};
+
+static const char * const mediatek_mt8183_b_supplies[] = { "mali", NULL };
+static const struct panfrost_compatible mediatek_mt8183_b_data = {
+	.num_supplies = ARRAY_SIZE(mediatek_mt8183_b_supplies) - 1,
+	.supply_names = mediatek_mt8183_b_supplies,
+	.num_pm_domains = ARRAY_SIZE(mediatek_mt8183_pm_domains),
+	.pm_domain_names = mediatek_mt8183_pm_domains,
+};
+
+static const char * const mediatek_mt8186_pm_domains[] = { "core0", "core1" };
+static const struct panfrost_compatible mediatek_mt8186_data = {
+	.num_supplies = ARRAY_SIZE(mediatek_mt8183_b_supplies) - 1,
+	.supply_names = mediatek_mt8183_b_supplies,
+	.num_pm_domains = ARRAY_SIZE(mediatek_mt8186_pm_domains),
+	.pm_domain_names = mediatek_mt8186_pm_domains,
+};
+
+static const char * const mediatek_mt8192_supplies[] = { "mali", NULL };
+static const char * const mediatek_mt8192_pm_domains[] = { "core0", "core1", "core2",
+							   "core3", "core4" };
+static const struct panfrost_compatible mediatek_mt8192_data = {
+	.num_supplies = ARRAY_SIZE(mediatek_mt8192_supplies) - 1,
+	.supply_names = mediatek_mt8192_supplies,
+	.num_pm_domains = ARRAY_SIZE(mediatek_mt8192_pm_domains),
+	.pm_domain_names = mediatek_mt8192_pm_domains,
+};
+
+static const struct of_device_id dt_match[] = {
+	/* Set first to probe before the generic compatibles */
+	{ .compatible = "amlogic,meson-gxm-mali",
+	  .data = &amlogic_data, },
+	{ .compatible = "amlogic,meson-g12a-mali",
+	  .data = &amlogic_data, },
+	{ .compatible = "arm,mali-t604", .data = &default_data, },
+	{ .compatible = "arm,mali-t624", .data = &default_data, },
+	{ .compatible = "arm,mali-t628", .data = &default_data, },
+	{ .compatible = "arm,mali-t720", .data = &default_data, },
+	{ .compatible = "arm,mali-t760", .data = &default_data, },
+	{ .compatible = "arm,mali-t820", .data = &default_data, },
+	{ .compatible = "arm,mali-t830", .data = &default_data, },
+	{ .compatible = "arm,mali-t860", .data = &default_data, },
+	{ .compatible = "arm,mali-t880", .data = &default_data, },
+	{ .compatible = "arm,mali-bifrost", .data = &default_data, },
+	{ .compatible = "arm,mali-valhall-jm", .data = &default_data, },
+	{ .compatible = "mediatek,mt8183-mali", .data = &mediatek_mt8183_data },
+	{ .compatible = "mediatek,mt8183b-mali", .data = &mediatek_mt8183_b_data },
+	{ .compatible = "mediatek,mt8186-mali", .data = &mediatek_mt8186_data },
+	{ .compatible = "mediatek,mt8192-mali", .data = &mediatek_mt8192_data },
+	{}
+};
+MODULE_DEVICE_TABLE(of, dt_match);
+
+static struct platform_driver panfrost_driver = {
+	.probe		= panfrost_probe,
+	.remove_new	= panfrost_remove,
+	.driver		= {
+		.name	= "panfrost",
+		.pm	= pm_ptr(&panfrost_pm_ops),
+		.of_match_table = dt_match,
+	},
+};
+module_platform_driver(panfrost_driver);
+
+MODULE_AUTHOR("Panfrost Project Developers");
+MODULE_DESCRIPTION("Panfrost DRM Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/panfrost/panfrost_dump.c b/drivers/gpu/drm/panfrost/panfrost_dump.c
new file mode 100644
index 0000000000..e7942ac449
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_dump.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2021 Collabora ltd. */
+
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/devcoredump.h>
+#include <linux/moduleparam.h>
+#include <linux/iosys-map.h>
+#include <drm/panfrost_drm.h>
+#include <drm/drm_device.h>
+
+#include "panfrost_job.h"
+#include "panfrost_gem.h"
+#include "panfrost_regs.h"
+#include "panfrost_dump.h"
+#include "panfrost_device.h"
+
+static bool panfrost_dump_core = true;
+module_param_named(dump_core, panfrost_dump_core, bool, 0600);
+
+struct panfrost_dump_iterator {
+	void *start;
+	struct panfrost_dump_object_header *hdr;
+	void *data;
+};
+
+static const unsigned short panfrost_dump_registers[] = {
+	SHADER_READY_LO,
+	SHADER_READY_HI,
+	TILER_READY_LO,
+	TILER_READY_HI,
+	L2_READY_LO,
+	L2_READY_HI,
+	JOB_INT_MASK,
+	JOB_INT_STAT,
+	JS_HEAD_LO(0),
+	JS_HEAD_HI(0),
+	JS_TAIL_LO(0),
+	JS_TAIL_HI(0),
+	JS_AFFINITY_LO(0),
+	JS_AFFINITY_HI(0),
+	JS_CONFIG(0),
+	JS_STATUS(0),
+	JS_HEAD_NEXT_LO(0),
+	JS_HEAD_NEXT_HI(0),
+	JS_AFFINITY_NEXT_LO(0),
+	JS_AFFINITY_NEXT_HI(0),
+	JS_CONFIG_NEXT(0),
+	MMU_INT_MASK,
+	MMU_INT_STAT,
+	AS_TRANSTAB_LO(0),
+	AS_TRANSTAB_HI(0),
+	AS_MEMATTR_LO(0),
+	AS_MEMATTR_HI(0),
+	AS_FAULTSTATUS(0),
+	AS_FAULTADDRESS_LO(0),
+	AS_FAULTADDRESS_HI(0),
+	AS_STATUS(0),
+};
+
+static void panfrost_core_dump_header(struct panfrost_dump_iterator *iter,
+				      u32 type, void *data_end)
+{
+	struct panfrost_dump_object_header *hdr = iter->hdr;
+
+	hdr->magic = PANFROSTDUMP_MAGIC;
+	hdr->type = type;
+	hdr->file_offset = iter->data - iter->start;
+	hdr->file_size = data_end - iter->data;
+
+	iter->hdr++;
+	iter->data += hdr->file_size;
+}
+
+static void
+panfrost_core_dump_registers(struct panfrost_dump_iterator *iter,
+			     struct panfrost_device *pfdev,
+			     u32 as_nr, int slot)
+{
+	struct panfrost_dump_registers *dumpreg = iter->data;
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(panfrost_dump_registers); i++, dumpreg++) {
+		unsigned int js_as_offset = 0;
+		unsigned int reg;
+
+		if (panfrost_dump_registers[i] >= JS_BASE &&
+		    panfrost_dump_registers[i] <= JS_BASE + JS_SLOT_STRIDE)
+			js_as_offset = slot * JS_SLOT_STRIDE;
+		else if (panfrost_dump_registers[i] >= MMU_BASE &&
+			 panfrost_dump_registers[i] <= MMU_BASE + MMU_AS_STRIDE)
+			js_as_offset = (as_nr << MMU_AS_SHIFT);
+
+		reg = panfrost_dump_registers[i] + js_as_offset;
+
+		dumpreg->reg = reg;
+		dumpreg->value = gpu_read(pfdev, reg);
+	}
+
+	panfrost_core_dump_header(iter, PANFROSTDUMP_BUF_REG, dumpreg);
+}
+
+void panfrost_core_dump(struct panfrost_job *job)
+{
+	struct panfrost_device *pfdev = job->pfdev;
+	struct panfrost_dump_iterator iter;
+	struct drm_gem_object *dbo;
+	unsigned int n_obj, n_bomap_pages;
+	u64 *bomap, *bomap_start;
+	size_t file_size;
+	u32 as_nr;
+	int slot;
+	int ret, i;
+
+	as_nr = job->mmu->as;
+	slot = panfrost_job_get_slot(job);
+
+	/* Only catch the first event, or when manually re-armed */
+	if (!panfrost_dump_core)
+		return;
+	panfrost_dump_core = false;
+
+	/* At least, we dump registers and end marker */
+	n_obj = 2;
+	n_bomap_pages = 0;
+	file_size = ARRAY_SIZE(panfrost_dump_registers) *
+			sizeof(struct panfrost_dump_registers);
+
+	/* Add in the active buffer objects */
+	for (i = 0; i < job->bo_count; i++) {
+		/*
+		 * Even though the CPU could be configured to use 16K or 64K pages, this
+		 * is a very unusual situation for most kernel setups on SoCs that have
+		 * a Panfrost device. Also many places across the driver make the somewhat
+		 * arbitrary assumption that Panfrost's MMU page size is the same as the CPU's,
+		 * so let's have a sanity check to ensure that's always the case
+		 */
+		dbo = job->bos[i];
+		WARN_ON(!IS_ALIGNED(dbo->size, PAGE_SIZE));
+
+		file_size += dbo->size;
+		n_bomap_pages += dbo->size >> PAGE_SHIFT;
+		n_obj++;
+	}
+
+	/* If we have any buffer objects, add a bomap object */
+	if (n_bomap_pages) {
+		file_size += n_bomap_pages * sizeof(*bomap);
+		n_obj++;
+	}
+
+	/* Add the size of the headers */
+	file_size += sizeof(*iter.hdr) * n_obj;
+
+	/*
+	 * Allocate the file in vmalloc memory, it's likely to be big.
+	 * The reason behind these GFP flags is that we don't want to trigger the
+	 * OOM killer in the event that not enough memory could be found for our
+	 * dump file. We also don't want the allocator to do any error reporting,
+	 * as the right behaviour is failing gracefully if a big enough buffer
+	 * could not be allocated.
+	 */
+	iter.start = __vmalloc(file_size, GFP_KERNEL | __GFP_NOWARN |
+			__GFP_NORETRY);
+	if (!iter.start) {
+		dev_warn(pfdev->dev, "failed to allocate devcoredump file\n");
+		return;
+	}
+
+	/* Point the data member after the headers */
+	iter.hdr = iter.start;
+	iter.data = &iter.hdr[n_obj];
+
+	memset(iter.hdr, 0, iter.data - iter.start);
+
+	/*
+	 * For now, we write the job identifier in the register dump header,
+	 * so that we can decode the entire dump later with pandecode
+	 */
+	iter.hdr->reghdr.jc = job->jc;
+	iter.hdr->reghdr.major = PANFROSTDUMP_MAJOR;
+	iter.hdr->reghdr.minor = PANFROSTDUMP_MINOR;
+	iter.hdr->reghdr.gpu_id = pfdev->features.id;
+	iter.hdr->reghdr.nbos = job->bo_count;
+
+	panfrost_core_dump_registers(&iter, pfdev, as_nr, slot);
+
+	/* Reserve space for the bomap */
+	if (job->bo_count) {
+		bomap_start = bomap = iter.data;
+		memset(bomap, 0, sizeof(*bomap) * n_bomap_pages);
+		panfrost_core_dump_header(&iter, PANFROSTDUMP_BUF_BOMAP,
+					  bomap + n_bomap_pages);
+	}
+
+	for (i = 0; i < job->bo_count; i++) {
+		struct iosys_map map;
+		struct panfrost_gem_mapping *mapping;
+		struct panfrost_gem_object *bo;
+		struct sg_page_iter page_iter;
+		void *vaddr;
+
+		bo = to_panfrost_bo(job->bos[i]);
+		mapping = job->mappings[i];
+
+		if (!bo->base.sgt) {
+			dev_err(pfdev->dev, "Panfrost Dump: BO has no sgt, cannot dump\n");
+			iter.hdr->bomap.valid = 0;
+			goto dump_header;
+		}
+
+		ret = drm_gem_vmap_unlocked(&bo->base.base, &map);
+		if (ret) {
+			dev_err(pfdev->dev, "Panfrost Dump: couldn't map Buffer Object\n");
+			iter.hdr->bomap.valid = 0;
+			goto dump_header;
+		}
+
+		WARN_ON(!mapping->active);
+
+		iter.hdr->bomap.data[0] = bomap - bomap_start;
+
+		for_each_sgtable_page(bo->base.sgt, &page_iter, 0) {
+			struct page *page = sg_page_iter_page(&page_iter);
+
+			if (!IS_ERR(page)) {
+				*bomap++ = page_to_phys(page);
+			} else {
+				dev_err(pfdev->dev, "Panfrost Dump: wrong page\n");
+				*bomap++ = 0;
+			}
+		}
+
+		iter.hdr->bomap.iova = mapping->mmnode.start << PAGE_SHIFT;
+
+		vaddr = map.vaddr;
+		memcpy(iter.data, vaddr, bo->base.base.size);
+
+		drm_gem_vunmap_unlocked(&bo->base.base, &map);
+
+		iter.hdr->bomap.valid = 1;
+
+dump_header:	panfrost_core_dump_header(&iter, PANFROSTDUMP_BUF_BO, iter.data +
+					  bo->base.base.size);
+	}
+	panfrost_core_dump_header(&iter, PANFROSTDUMP_BUF_TRAILER, iter.data);
+
+	dev_coredumpv(pfdev->dev, iter.start, iter.data - iter.start, GFP_KERNEL);
+}
diff --git a/drivers/gpu/drm/panfrost/panfrost_dump.h b/drivers/gpu/drm/panfrost/panfrost_dump.h
new file mode 100644
index 0000000000..7d9bcefa53
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_dump.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2021 Collabora ltd.
+ */
+
+#ifndef PANFROST_DUMP_H
+#define PANFROST_DUMP_H
+
+struct panfrost_job;
+void panfrost_core_dump(struct panfrost_job *job);
+
+#endif
diff --git a/drivers/gpu/drm/panfrost/panfrost_features.h b/drivers/gpu/drm/panfrost/panfrost_features.h
new file mode 100644
index 0000000000..7ed0cd3ea2
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_features.h
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. */
+/* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */
+#ifndef __PANFROST_FEATURES_H__
+#define __PANFROST_FEATURES_H__
+
+#include <linux/bitops.h>
+
+#include "panfrost_device.h"
+
+enum panfrost_hw_feature {
+	HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	HW_FEATURE_XAFFINITY,
+	HW_FEATURE_V4,
+	HW_FEATURE_FLUSH_REDUCTION,
+	HW_FEATURE_PROTECTED_MODE,
+	HW_FEATURE_COHERENCY_REG,
+	HW_FEATURE_PROTECTED_DEBUG_MODE,
+	HW_FEATURE_AARCH64_MMU,
+	HW_FEATURE_TLS_HASHING,
+	HW_FEATURE_THREAD_GROUP_SPLIT,
+	HW_FEATURE_IDVS_GROUP_SIZE,
+	HW_FEATURE_CLEAN_ONLY_SAFE,
+	HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
+};
+
+#define hw_features_t600 (\
+	BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \
+	BIT_ULL(HW_FEATURE_V4))
+
+#define hw_features_t620 hw_features_t600
+
+#define hw_features_t720 hw_features_t600
+
+#define hw_features_t760 (\
+	BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \
+	BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \
+	BIT_ULL(HW_FEATURE_XAFFINITY) | \
+	BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT))
+
+#define hw_features_t860 hw_features_t760
+
+#define hw_features_t880 hw_features_t760
+
+#define hw_features_t830 hw_features_t760
+
+#define hw_features_t820 hw_features_t760
+
+#define hw_features_g71 (\
+	BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \
+	BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \
+	BIT_ULL(HW_FEATURE_XAFFINITY) | \
+	BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \
+	BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \
+	BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \
+	BIT_ULL(HW_FEATURE_COHERENCY_REG))
+
+#define hw_features_g72 (\
+	BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \
+	BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \
+	BIT_ULL(HW_FEATURE_XAFFINITY) | \
+	BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \
+	BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \
+	BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \
+	BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \
+	BIT_ULL(HW_FEATURE_COHERENCY_REG))
+
+#define hw_features_g51 hw_features_g72
+
+#define hw_features_g52 (\
+	BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \
+	BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \
+	BIT_ULL(HW_FEATURE_XAFFINITY) | \
+	BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \
+	BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \
+	BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \
+	BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \
+	BIT_ULL(HW_FEATURE_IDVS_GROUP_SIZE) | \
+	BIT_ULL(HW_FEATURE_COHERENCY_REG))
+
+#define hw_features_g76 (\
+	BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \
+	BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \
+	BIT_ULL(HW_FEATURE_XAFFINITY) | \
+	BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \
+	BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \
+	BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \
+	BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \
+	BIT_ULL(HW_FEATURE_COHERENCY_REG) | \
+	BIT_ULL(HW_FEATURE_AARCH64_MMU) | \
+	BIT_ULL(HW_FEATURE_TLS_HASHING) | \
+	BIT_ULL(HW_FEATURE_IDVS_GROUP_SIZE) | \
+	BIT_ULL(HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG))
+
+#define hw_features_g31 (\
+	BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \
+	BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \
+	BIT_ULL(HW_FEATURE_XAFFINITY) | \
+	BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \
+	BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \
+	BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \
+	BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \
+	BIT_ULL(HW_FEATURE_COHERENCY_REG) | \
+	BIT_ULL(HW_FEATURE_AARCH64_MMU) | \
+	BIT_ULL(HW_FEATURE_TLS_HASHING) | \
+	BIT_ULL(HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG))
+
+#define hw_features_g57 (\
+	BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \
+	BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \
+	BIT_ULL(HW_FEATURE_XAFFINITY) | \
+	BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \
+	BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \
+	BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \
+	BIT_ULL(HW_FEATURE_COHERENCY_REG) | \
+	BIT_ULL(HW_FEATURE_AARCH64_MMU) | \
+	BIT_ULL(HW_FEATURE_IDVS_GROUP_SIZE) | \
+	BIT_ULL(HW_FEATURE_CLEAN_ONLY_SAFE))
+
+static inline bool panfrost_has_hw_feature(struct panfrost_device *pfdev,
+					   enum panfrost_hw_feature feat)
+{
+	return test_bit(feat, pfdev->features.hw_features);
+}
+
+#endif
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
new file mode 100644
index 0000000000..3c812fbd12
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
@@ -0,0 +1,274 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-mapping.h>
+
+#include <drm/panfrost_drm.h>
+#include "panfrost_device.h"
+#include "panfrost_gem.h"
+#include "panfrost_mmu.h"
+
+/* Called DRM core on the last userspace/kernel unreference of the
+ * BO.
+ */
+static void panfrost_gem_free_object(struct drm_gem_object *obj)
+{
+	struct panfrost_gem_object *bo = to_panfrost_bo(obj);
+	struct panfrost_device *pfdev = obj->dev->dev_private;
+
+	/*
+	 * Make sure the BO is no longer inserted in the shrinker list before
+	 * taking care of the destruction itself. If we don't do that we have a
+	 * race condition between this function and what's done in
+	 * panfrost_gem_shrinker_scan().
+	 */
+	mutex_lock(&pfdev->shrinker_lock);
+	list_del_init(&bo->base.madv_list);
+	mutex_unlock(&pfdev->shrinker_lock);
+
+	/*
+	 * If we still have mappings attached to the BO, there's a problem in
+	 * our refcounting.
+	 */
+	WARN_ON_ONCE(!list_empty(&bo->mappings.list));
+
+	if (bo->sgts) {
+		int i;
+		int n_sgt = bo->base.base.size / SZ_2M;
+
+		for (i = 0; i < n_sgt; i++) {
+			if (bo->sgts[i].sgl) {
+				dma_unmap_sgtable(pfdev->dev, &bo->sgts[i],
+						  DMA_BIDIRECTIONAL, 0);
+				sg_free_table(&bo->sgts[i]);
+			}
+		}
+		kvfree(bo->sgts);
+	}
+
+	drm_gem_shmem_free(&bo->base);
+}
+
+struct panfrost_gem_mapping *
+panfrost_gem_mapping_get(struct panfrost_gem_object *bo,
+			 struct panfrost_file_priv *priv)
+{
+	struct panfrost_gem_mapping *iter, *mapping = NULL;
+
+	mutex_lock(&bo->mappings.lock);
+	list_for_each_entry(iter, &bo->mappings.list, node) {
+		if (iter->mmu == priv->mmu) {
+			kref_get(&iter->refcount);
+			mapping = iter;
+			break;
+		}
+	}
+	mutex_unlock(&bo->mappings.lock);
+
+	return mapping;
+}
+
+static void
+panfrost_gem_teardown_mapping(struct panfrost_gem_mapping *mapping)
+{
+	if (mapping->active)
+		panfrost_mmu_unmap(mapping);
+
+	spin_lock(&mapping->mmu->mm_lock);
+	if (drm_mm_node_allocated(&mapping->mmnode))
+		drm_mm_remove_node(&mapping->mmnode);
+	spin_unlock(&mapping->mmu->mm_lock);
+}
+
+static void panfrost_gem_mapping_release(struct kref *kref)
+{
+	struct panfrost_gem_mapping *mapping;
+
+	mapping = container_of(kref, struct panfrost_gem_mapping, refcount);
+
+	panfrost_gem_teardown_mapping(mapping);
+	drm_gem_object_put(&mapping->obj->base.base);
+	panfrost_mmu_ctx_put(mapping->mmu);
+	kfree(mapping);
+}
+
+void panfrost_gem_mapping_put(struct panfrost_gem_mapping *mapping)
+{
+	if (!mapping)
+		return;
+
+	kref_put(&mapping->refcount, panfrost_gem_mapping_release);
+}
+
+void panfrost_gem_teardown_mappings_locked(struct panfrost_gem_object *bo)
+{
+	struct panfrost_gem_mapping *mapping;
+
+	list_for_each_entry(mapping, &bo->mappings.list, node)
+		panfrost_gem_teardown_mapping(mapping);
+}
+
+int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_priv)
+{
+	int ret;
+	size_t size = obj->size;
+	u64 align;
+	struct panfrost_gem_object *bo = to_panfrost_bo(obj);
+	unsigned long color = bo->noexec ? PANFROST_BO_NOEXEC : 0;
+	struct panfrost_file_priv *priv = file_priv->driver_priv;
+	struct panfrost_gem_mapping *mapping;
+
+	mapping = kzalloc(sizeof(*mapping), GFP_KERNEL);
+	if (!mapping)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&mapping->node);
+	kref_init(&mapping->refcount);
+	drm_gem_object_get(obj);
+	mapping->obj = bo;
+
+	/*
+	 * Executable buffers cannot cross a 16MB boundary as the program
+	 * counter is 24-bits. We assume executable buffers will be less than
+	 * 16MB and aligning executable buffers to their size will avoid
+	 * crossing a 16MB boundary.
+	 */
+	if (!bo->noexec)
+		align = size >> PAGE_SHIFT;
+	else
+		align = size >= SZ_2M ? SZ_2M >> PAGE_SHIFT : 0;
+
+	mapping->mmu = panfrost_mmu_ctx_get(priv->mmu);
+	spin_lock(&mapping->mmu->mm_lock);
+	ret = drm_mm_insert_node_generic(&mapping->mmu->mm, &mapping->mmnode,
+					 size >> PAGE_SHIFT, align, color, 0);
+	spin_unlock(&mapping->mmu->mm_lock);
+	if (ret)
+		goto err;
+
+	if (!bo->is_heap) {
+		ret = panfrost_mmu_map(mapping);
+		if (ret)
+			goto err;
+	}
+
+	mutex_lock(&bo->mappings.lock);
+	WARN_ON(bo->base.madv != PANFROST_MADV_WILLNEED);
+	list_add_tail(&mapping->node, &bo->mappings.list);
+	mutex_unlock(&bo->mappings.lock);
+
+err:
+	if (ret)
+		panfrost_gem_mapping_put(mapping);
+	return ret;
+}
+
+void panfrost_gem_close(struct drm_gem_object *obj, struct drm_file *file_priv)
+{
+	struct panfrost_file_priv *priv = file_priv->driver_priv;
+	struct panfrost_gem_object *bo = to_panfrost_bo(obj);
+	struct panfrost_gem_mapping *mapping = NULL, *iter;
+
+	mutex_lock(&bo->mappings.lock);
+	list_for_each_entry(iter, &bo->mappings.list, node) {
+		if (iter->mmu == priv->mmu) {
+			mapping = iter;
+			list_del(&iter->node);
+			break;
+		}
+	}
+	mutex_unlock(&bo->mappings.lock);
+
+	panfrost_gem_mapping_put(mapping);
+}
+
+static int panfrost_gem_pin(struct drm_gem_object *obj)
+{
+	struct panfrost_gem_object *bo = to_panfrost_bo(obj);
+
+	if (bo->is_heap)
+		return -EINVAL;
+
+	return drm_gem_shmem_pin(&bo->base);
+}
+
+static const struct drm_gem_object_funcs panfrost_gem_funcs = {
+	.free = panfrost_gem_free_object,
+	.open = panfrost_gem_open,
+	.close = panfrost_gem_close,
+	.print_info = drm_gem_shmem_object_print_info,
+	.pin = panfrost_gem_pin,
+	.unpin = drm_gem_shmem_object_unpin,
+	.get_sg_table = drm_gem_shmem_object_get_sg_table,
+	.vmap = drm_gem_shmem_object_vmap,
+	.vunmap = drm_gem_shmem_object_vunmap,
+	.mmap = drm_gem_shmem_object_mmap,
+	.vm_ops = &drm_gem_shmem_vm_ops,
+};
+
+/**
+ * panfrost_gem_create_object - Implementation of driver->gem_create_object.
+ * @dev: DRM device
+ * @size: Size in bytes of the memory the object will reference
+ *
+ * This lets the GEM helpers allocate object structs for us, and keep
+ * our BO stats correct.
+ */
+struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t size)
+{
+	struct panfrost_device *pfdev = dev->dev_private;
+	struct panfrost_gem_object *obj;
+
+	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+	if (!obj)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&obj->mappings.list);
+	mutex_init(&obj->mappings.lock);
+	obj->base.base.funcs = &panfrost_gem_funcs;
+	obj->base.map_wc = !pfdev->coherent;
+
+	return &obj->base.base;
+}
+
+struct panfrost_gem_object *
+panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags)
+{
+	struct drm_gem_shmem_object *shmem;
+	struct panfrost_gem_object *bo;
+
+	/* Round up heap allocations to 2MB to keep fault handling simple */
+	if (flags & PANFROST_BO_HEAP)
+		size = roundup(size, SZ_2M);
+
+	shmem = drm_gem_shmem_create(dev, size);
+	if (IS_ERR(shmem))
+		return ERR_CAST(shmem);
+
+	bo = to_panfrost_bo(&shmem->base);
+	bo->noexec = !!(flags & PANFROST_BO_NOEXEC);
+	bo->is_heap = !!(flags & PANFROST_BO_HEAP);
+
+	return bo;
+}
+
+struct drm_gem_object *
+panfrost_gem_prime_import_sg_table(struct drm_device *dev,
+				   struct dma_buf_attachment *attach,
+				   struct sg_table *sgt)
+{
+	struct drm_gem_object *obj;
+	struct panfrost_gem_object *bo;
+
+	obj = drm_gem_shmem_prime_import_sg_table(dev, attach, sgt);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	bo = to_panfrost_bo(obj);
+	bo->noexec = true;
+
+	return obj;
+}
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h
new file mode 100644
index 0000000000..ad2877eeec
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+
+#ifndef __PANFROST_GEM_H__
+#define __PANFROST_GEM_H__
+
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_mm.h>
+
+struct panfrost_mmu;
+
+struct panfrost_gem_object {
+	struct drm_gem_shmem_object base;
+	struct sg_table *sgts;
+
+	/*
+	 * Use a list for now. If searching a mapping ever becomes the
+	 * bottleneck, we should consider using an RB-tree, or even better,
+	 * let the core store drm_gem_object_mapping entries (where we
+	 * could place driver specific data) instead of drm_gem_object ones
+	 * in its drm_file->object_idr table.
+	 *
+	 * struct drm_gem_object_mapping {
+	 *	struct drm_gem_object *obj;
+	 *	void *driver_priv;
+	 * };
+	 */
+	struct {
+		struct list_head list;
+		struct mutex lock;
+	} mappings;
+
+	/*
+	 * Count the number of jobs referencing this BO so we don't let the
+	 * shrinker reclaim this object prematurely.
+	 */
+	atomic_t gpu_usecount;
+
+	bool noexec		:1;
+	bool is_heap		:1;
+};
+
+struct panfrost_gem_mapping {
+	struct list_head node;
+	struct kref refcount;
+	struct panfrost_gem_object *obj;
+	struct drm_mm_node mmnode;
+	struct panfrost_mmu *mmu;
+	bool active		:1;
+};
+
+static inline
+struct  panfrost_gem_object *to_panfrost_bo(struct drm_gem_object *obj)
+{
+	return container_of(to_drm_gem_shmem_obj(obj), struct panfrost_gem_object, base);
+}
+
+static inline struct panfrost_gem_mapping *
+drm_mm_node_to_panfrost_mapping(struct drm_mm_node *node)
+{
+	return container_of(node, struct panfrost_gem_mapping, mmnode);
+}
+
+struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t size);
+
+struct drm_gem_object *
+panfrost_gem_prime_import_sg_table(struct drm_device *dev,
+				   struct dma_buf_attachment *attach,
+				   struct sg_table *sgt);
+
+struct panfrost_gem_object *
+panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags);
+
+int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_priv);
+void panfrost_gem_close(struct drm_gem_object *obj,
+			struct drm_file *file_priv);
+
+struct panfrost_gem_mapping *
+panfrost_gem_mapping_get(struct panfrost_gem_object *bo,
+			 struct panfrost_file_priv *priv);
+void panfrost_gem_mapping_put(struct panfrost_gem_mapping *mapping);
+void panfrost_gem_teardown_mappings_locked(struct panfrost_gem_object *bo);
+
+void panfrost_gem_shrinker_init(struct drm_device *dev);
+void panfrost_gem_shrinker_cleanup(struct drm_device *dev);
+
+#endif /* __PANFROST_GEM_H__ */
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c
new file mode 100644
index 0000000000..6a71a2555f
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019 Arm Ltd.
+ *
+ * Based on msm_gem_freedreno.c:
+ * Copyright (C) 2016 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ */
+
+#include <linux/list.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_gem_shmem_helper.h>
+
+#include "panfrost_device.h"
+#include "panfrost_gem.h"
+#include "panfrost_mmu.h"
+
+static unsigned long
+panfrost_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
+{
+	struct panfrost_device *pfdev =
+		container_of(shrinker, struct panfrost_device, shrinker);
+	struct drm_gem_shmem_object *shmem;
+	unsigned long count = 0;
+
+	if (!mutex_trylock(&pfdev->shrinker_lock))
+		return 0;
+
+	list_for_each_entry(shmem, &pfdev->shrinker_list, madv_list) {
+		if (drm_gem_shmem_is_purgeable(shmem))
+			count += shmem->base.size >> PAGE_SHIFT;
+	}
+
+	mutex_unlock(&pfdev->shrinker_lock);
+
+	return count;
+}
+
+static bool panfrost_gem_purge(struct drm_gem_object *obj)
+{
+	struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
+	struct panfrost_gem_object *bo = to_panfrost_bo(obj);
+	bool ret = false;
+
+	if (atomic_read(&bo->gpu_usecount))
+		return false;
+
+	if (!mutex_trylock(&bo->mappings.lock))
+		return false;
+
+	if (!dma_resv_trylock(shmem->base.resv))
+		goto unlock_mappings;
+
+	panfrost_gem_teardown_mappings_locked(bo);
+	drm_gem_shmem_purge(&bo->base);
+	ret = true;
+
+	dma_resv_unlock(shmem->base.resv);
+
+unlock_mappings:
+	mutex_unlock(&bo->mappings.lock);
+	return ret;
+}
+
+static unsigned long
+panfrost_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
+{
+	struct panfrost_device *pfdev =
+		container_of(shrinker, struct panfrost_device, shrinker);
+	struct drm_gem_shmem_object *shmem, *tmp;
+	unsigned long freed = 0;
+
+	if (!mutex_trylock(&pfdev->shrinker_lock))
+		return SHRINK_STOP;
+
+	list_for_each_entry_safe(shmem, tmp, &pfdev->shrinker_list, madv_list) {
+		if (freed >= sc->nr_to_scan)
+			break;
+		if (drm_gem_shmem_is_purgeable(shmem) &&
+		    panfrost_gem_purge(&shmem->base)) {
+			freed += shmem->base.size >> PAGE_SHIFT;
+			list_del_init(&shmem->madv_list);
+		}
+	}
+
+	mutex_unlock(&pfdev->shrinker_lock);
+
+	if (freed > 0)
+		pr_info_ratelimited("Purging %lu bytes\n", freed << PAGE_SHIFT);
+
+	return freed;
+}
+
+/**
+ * panfrost_gem_shrinker_init - Initialize panfrost shrinker
+ * @dev: DRM device
+ *
+ * This function registers and sets up the panfrost shrinker.
+ */
+void panfrost_gem_shrinker_init(struct drm_device *dev)
+{
+	struct panfrost_device *pfdev = dev->dev_private;
+	pfdev->shrinker.count_objects = panfrost_gem_shrinker_count;
+	pfdev->shrinker.scan_objects = panfrost_gem_shrinker_scan;
+	pfdev->shrinker.seeks = DEFAULT_SEEKS;
+	WARN_ON(register_shrinker(&pfdev->shrinker, "drm-panfrost"));
+}
+
+/**
+ * panfrost_gem_shrinker_cleanup - Clean up panfrost shrinker
+ * @dev: DRM device
+ *
+ * This function unregisters the panfrost shrinker.
+ */
+void panfrost_gem_shrinker_cleanup(struct drm_device *dev)
+{
+	struct panfrost_device *pfdev = dev->dev_private;
+
+	if (pfdev->shrinker.nr_deferred) {
+		unregister_shrinker(&pfdev->shrinker);
+	}
+}
diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
new file mode 100644
index 0000000000..eca45b83e4
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
@@ -0,0 +1,459 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */
+/* Copyright 2019 Collabora ltd. */
+#include <linux/bitfield.h>
+#include <linux/bitmap.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#include "panfrost_device.h"
+#include "panfrost_features.h"
+#include "panfrost_issues.h"
+#include "panfrost_gpu.h"
+#include "panfrost_perfcnt.h"
+#include "panfrost_regs.h"
+
+static irqreturn_t panfrost_gpu_irq_handler(int irq, void *data)
+{
+	struct panfrost_device *pfdev = data;
+	u32 state = gpu_read(pfdev, GPU_INT_STAT);
+	u32 fault_status = gpu_read(pfdev, GPU_FAULT_STATUS);
+
+	if (!state)
+		return IRQ_NONE;
+
+	if (state & GPU_IRQ_MASK_ERROR) {
+		u64 address = (u64) gpu_read(pfdev, GPU_FAULT_ADDRESS_HI) << 32;
+		address |= gpu_read(pfdev, GPU_FAULT_ADDRESS_LO);
+
+		dev_warn(pfdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx\n",
+			 fault_status, panfrost_exception_name(fault_status & 0xFF),
+			 address);
+
+		if (state & GPU_IRQ_MULTIPLE_FAULT)
+			dev_warn(pfdev->dev, "There were multiple GPU faults - some have not been reported\n");
+
+		gpu_write(pfdev, GPU_INT_MASK, 0);
+	}
+
+	if (state & GPU_IRQ_PERFCNT_SAMPLE_COMPLETED)
+		panfrost_perfcnt_sample_done(pfdev);
+
+	if (state & GPU_IRQ_CLEAN_CACHES_COMPLETED)
+		panfrost_perfcnt_clean_cache_done(pfdev);
+
+	gpu_write(pfdev, GPU_INT_CLEAR, state);
+
+	return IRQ_HANDLED;
+}
+
+int panfrost_gpu_soft_reset(struct panfrost_device *pfdev)
+{
+	int ret;
+	u32 val;
+
+	gpu_write(pfdev, GPU_INT_MASK, 0);
+	gpu_write(pfdev, GPU_INT_CLEAR, GPU_IRQ_RESET_COMPLETED);
+	gpu_write(pfdev, GPU_CMD, GPU_CMD_SOFT_RESET);
+
+	ret = readl_relaxed_poll_timeout(pfdev->iomem + GPU_INT_RAWSTAT,
+		val, val & GPU_IRQ_RESET_COMPLETED, 100, 10000);
+
+	if (ret) {
+		dev_err(pfdev->dev, "gpu soft reset timed out\n");
+		return ret;
+	}
+
+	gpu_write(pfdev, GPU_INT_CLEAR, GPU_IRQ_MASK_ALL);
+
+	/* Only enable the interrupts we care about */
+	gpu_write(pfdev, GPU_INT_MASK,
+		  GPU_IRQ_MASK_ERROR |
+		  GPU_IRQ_PERFCNT_SAMPLE_COMPLETED |
+		  GPU_IRQ_CLEAN_CACHES_COMPLETED);
+
+	return 0;
+}
+
+void panfrost_gpu_amlogic_quirk(struct panfrost_device *pfdev)
+{
+	/*
+	 * The Amlogic integrated Mali-T820, Mali-G31 & Mali-G52 needs
+	 * these undocumented bits in GPU_PWR_OVERRIDE1 to be set in order
+	 * to operate correctly.
+	 */
+	gpu_write(pfdev, GPU_PWR_KEY, GPU_PWR_KEY_UNLOCK);
+	gpu_write(pfdev, GPU_PWR_OVERRIDE1, 0xfff | (0x20 << 16));
+}
+
+static void panfrost_gpu_init_quirks(struct panfrost_device *pfdev)
+{
+	u32 quirks = 0;
+
+	if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8443) ||
+	    panfrost_has_hw_issue(pfdev, HW_ISSUE_11035))
+		quirks |= SC_LS_PAUSEBUFFER_DISABLE;
+
+	if (panfrost_has_hw_issue(pfdev, HW_ISSUE_10327))
+		quirks |= SC_SDC_DISABLE_OQ_DISCARD;
+
+	if (panfrost_has_hw_issue(pfdev, HW_ISSUE_10797))
+		quirks |= SC_ENABLE_TEXGRD_FLAGS;
+
+	if (!panfrost_has_hw_issue(pfdev, GPUCORE_1619)) {
+		if (panfrost_model_cmp(pfdev, 0x750) < 0) /* T60x, T62x, T72x */
+			quirks |= SC_LS_ATTR_CHECK_DISABLE;
+		else if (panfrost_model_cmp(pfdev, 0x880) <= 0) /* T76x, T8xx */
+			quirks |= SC_LS_ALLOW_ATTR_TYPES;
+	}
+
+	if (panfrost_has_hw_issue(pfdev, HW_ISSUE_TTRX_2968_TTRX_3162))
+		quirks |= SC_VAR_ALGORITHM;
+
+	if (panfrost_has_hw_feature(pfdev, HW_FEATURE_TLS_HASHING))
+		quirks |= SC_TLS_HASH_ENABLE;
+
+	if (quirks)
+		gpu_write(pfdev, GPU_SHADER_CONFIG, quirks);
+
+
+	quirks = gpu_read(pfdev, GPU_TILER_CONFIG);
+
+	/* Set tiler clock gate override if required */
+	if (panfrost_has_hw_issue(pfdev, HW_ISSUE_T76X_3953))
+		quirks |= TC_CLOCK_GATE_OVERRIDE;
+
+	gpu_write(pfdev, GPU_TILER_CONFIG, quirks);
+
+
+	quirks = 0;
+	if ((panfrost_model_eq(pfdev, 0x860) || panfrost_model_eq(pfdev, 0x880)) &&
+	    pfdev->features.revision >= 0x2000)
+		quirks |= JM_MAX_JOB_THROTTLE_LIMIT << JM_JOB_THROTTLE_LIMIT_SHIFT;
+	else if (panfrost_model_eq(pfdev, 0x6000) &&
+		 pfdev->features.coherency_features == COHERENCY_ACE)
+		quirks |= (COHERENCY_ACE_LITE | COHERENCY_ACE) <<
+			   JM_FORCE_COHERENCY_FEATURES_SHIFT;
+
+	if (panfrost_has_hw_feature(pfdev, HW_FEATURE_IDVS_GROUP_SIZE))
+		quirks |= JM_DEFAULT_IDVS_GROUP_SIZE << JM_IDVS_GROUP_SIZE_SHIFT;
+
+	if (quirks)
+		gpu_write(pfdev, GPU_JM_CONFIG, quirks);
+
+	/* Here goes platform specific quirks */
+	if (pfdev->comp->vendor_quirk)
+		pfdev->comp->vendor_quirk(pfdev);
+}
+
+#define MAX_HW_REVS 6
+
+struct panfrost_model {
+	const char *name;
+	u32 id;
+	u32 id_mask;
+	u64 features;
+	u64 issues;
+	struct {
+		u32 revision;
+		u64 issues;
+	} revs[MAX_HW_REVS];
+};
+
+#define GPU_MODEL(_name, _id, ...) \
+{\
+	.name = __stringify(_name),				\
+	.id = _id,						\
+	.features = hw_features_##_name,			\
+	.issues = hw_issues_##_name,				\
+	.revs = { __VA_ARGS__ },				\
+}
+
+#define GPU_REV_EXT(name, _rev, _p, _s, stat) \
+{\
+	.revision = (_rev) << 12 | (_p) << 4 | (_s),		\
+	.issues = hw_issues_##name##_r##_rev##p##_p##stat,	\
+}
+#define GPU_REV(name, r, p) GPU_REV_EXT(name, r, p, 0, )
+
+static const struct panfrost_model gpu_models[] = {
+	/* T60x has an oddball version */
+	GPU_MODEL(t600, 0x600,
+		GPU_REV_EXT(t600, 0, 0, 1, _15dev0)),
+	GPU_MODEL(t620, 0x620,
+		GPU_REV(t620, 0, 1), GPU_REV(t620, 1, 0)),
+	GPU_MODEL(t720, 0x720),
+	GPU_MODEL(t760, 0x750,
+		GPU_REV(t760, 0, 0), GPU_REV(t760, 0, 1),
+		GPU_REV_EXT(t760, 0, 1, 0, _50rel0),
+		GPU_REV(t760, 0, 2), GPU_REV(t760, 0, 3)),
+	GPU_MODEL(t820, 0x820),
+	GPU_MODEL(t830, 0x830),
+	GPU_MODEL(t860, 0x860),
+	GPU_MODEL(t880, 0x880),
+
+	GPU_MODEL(g71, 0x6000,
+		GPU_REV_EXT(g71, 0, 0, 1, _05dev0)),
+	GPU_MODEL(g72, 0x6001),
+	GPU_MODEL(g51, 0x7000),
+	GPU_MODEL(g76, 0x7001),
+	GPU_MODEL(g52, 0x7002),
+	GPU_MODEL(g31, 0x7003,
+		GPU_REV(g31, 1, 0)),
+
+	GPU_MODEL(g57, 0x9001,
+		GPU_REV(g57, 0, 0)),
+
+	/* MediaTek MT8192 has a Mali-G57 with a different GPU ID from the
+	 * standard. Arm's driver does not appear to handle this model.
+	 * ChromeOS has a hack downstream for it. Treat it as equivalent to
+	 * standard Mali-G57 for now.
+	 */
+	GPU_MODEL(g57, 0x9003,
+		GPU_REV(g57, 0, 0)),
+};
+
+static void panfrost_gpu_init_features(struct panfrost_device *pfdev)
+{
+	u32 gpu_id, num_js, major, minor, status, rev;
+	const char *name = "unknown";
+	u64 hw_feat = 0;
+	u64 hw_issues = hw_issues_all;
+	const struct panfrost_model *model;
+	int i;
+
+	pfdev->features.l2_features = gpu_read(pfdev, GPU_L2_FEATURES);
+	pfdev->features.core_features = gpu_read(pfdev, GPU_CORE_FEATURES);
+	pfdev->features.tiler_features = gpu_read(pfdev, GPU_TILER_FEATURES);
+	pfdev->features.mem_features = gpu_read(pfdev, GPU_MEM_FEATURES);
+	pfdev->features.mmu_features = gpu_read(pfdev, GPU_MMU_FEATURES);
+	pfdev->features.thread_features = gpu_read(pfdev, GPU_THREAD_FEATURES);
+	pfdev->features.max_threads = gpu_read(pfdev, GPU_THREAD_MAX_THREADS);
+	pfdev->features.thread_max_workgroup_sz = gpu_read(pfdev, GPU_THREAD_MAX_WORKGROUP_SIZE);
+	pfdev->features.thread_max_barrier_sz = gpu_read(pfdev, GPU_THREAD_MAX_BARRIER_SIZE);
+	pfdev->features.coherency_features = gpu_read(pfdev, GPU_COHERENCY_FEATURES);
+	pfdev->features.afbc_features = gpu_read(pfdev, GPU_AFBC_FEATURES);
+	for (i = 0; i < 4; i++)
+		pfdev->features.texture_features[i] = gpu_read(pfdev, GPU_TEXTURE_FEATURES(i));
+
+	pfdev->features.as_present = gpu_read(pfdev, GPU_AS_PRESENT);
+
+	pfdev->features.js_present = gpu_read(pfdev, GPU_JS_PRESENT);
+	num_js = hweight32(pfdev->features.js_present);
+	for (i = 0; i < num_js; i++)
+		pfdev->features.js_features[i] = gpu_read(pfdev, GPU_JS_FEATURES(i));
+
+	pfdev->features.shader_present = gpu_read(pfdev, GPU_SHADER_PRESENT_LO);
+	pfdev->features.shader_present |= (u64)gpu_read(pfdev, GPU_SHADER_PRESENT_HI) << 32;
+
+	pfdev->features.tiler_present = gpu_read(pfdev, GPU_TILER_PRESENT_LO);
+	pfdev->features.tiler_present |= (u64)gpu_read(pfdev, GPU_TILER_PRESENT_HI) << 32;
+
+	pfdev->features.l2_present = gpu_read(pfdev, GPU_L2_PRESENT_LO);
+	pfdev->features.l2_present |= (u64)gpu_read(pfdev, GPU_L2_PRESENT_HI) << 32;
+	pfdev->features.nr_core_groups = hweight64(pfdev->features.l2_present);
+
+	pfdev->features.stack_present = gpu_read(pfdev, GPU_STACK_PRESENT_LO);
+	pfdev->features.stack_present |= (u64)gpu_read(pfdev, GPU_STACK_PRESENT_HI) << 32;
+
+	pfdev->features.thread_tls_alloc = gpu_read(pfdev, GPU_THREAD_TLS_ALLOC);
+
+	gpu_id = gpu_read(pfdev, GPU_ID);
+	pfdev->features.revision = gpu_id & 0xffff;
+	pfdev->features.id = gpu_id >> 16;
+
+	/* The T60x has an oddball ID value. Fix it up to the standard Midgard
+	 * format so we (and userspace) don't have to special case it.
+	 */
+	if (pfdev->features.id == 0x6956)
+		pfdev->features.id = 0x0600;
+
+	major = (pfdev->features.revision >> 12) & 0xf;
+	minor = (pfdev->features.revision >> 4) & 0xff;
+	status = pfdev->features.revision & 0xf;
+	rev = pfdev->features.revision;
+
+	gpu_id = pfdev->features.id;
+
+	for (model = gpu_models; model->name; model++) {
+		int best = -1;
+
+		if (!panfrost_model_eq(pfdev, model->id))
+			continue;
+
+		name = model->name;
+		hw_feat = model->features;
+		hw_issues |= model->issues;
+		for (i = 0; i < MAX_HW_REVS; i++) {
+			if (model->revs[i].revision == rev) {
+				best = i;
+				break;
+			} else if (model->revs[i].revision == (rev & ~0xf))
+				best = i;
+		}
+
+		if (best >= 0)
+			hw_issues |= model->revs[best].issues;
+
+		break;
+	}
+
+	bitmap_from_u64(pfdev->features.hw_features, hw_feat);
+	bitmap_from_u64(pfdev->features.hw_issues, hw_issues);
+
+	dev_info(pfdev->dev, "mali-%s id 0x%x major 0x%x minor 0x%x status 0x%x",
+		 name, gpu_id, major, minor, status);
+	dev_info(pfdev->dev, "features: %64pb, issues: %64pb",
+		 pfdev->features.hw_features,
+		 pfdev->features.hw_issues);
+
+	dev_info(pfdev->dev, "Features: L2:0x%08x Shader:0x%08x Tiler:0x%08x Mem:0x%0x MMU:0x%08x AS:0x%x JS:0x%x",
+		 pfdev->features.l2_features,
+		 pfdev->features.core_features,
+		 pfdev->features.tiler_features,
+		 pfdev->features.mem_features,
+		 pfdev->features.mmu_features,
+		 pfdev->features.as_present,
+		 pfdev->features.js_present);
+
+	dev_info(pfdev->dev, "shader_present=0x%0llx l2_present=0x%0llx",
+		 pfdev->features.shader_present, pfdev->features.l2_present);
+}
+
+static u64 panfrost_get_core_mask(struct panfrost_device *pfdev)
+{
+	u64 core_mask;
+
+	if (pfdev->features.l2_present == 1)
+		return U64_MAX;
+
+	/*
+	 * Only support one core group now.
+	 * ~(l2_present - 1) unsets all bits in l2_present except
+	 * the bottom bit. (l2_present - 2) has all the bits in
+	 * the first core group set. AND them together to generate
+	 * a mask of cores in the first core group.
+	 */
+	core_mask = ~(pfdev->features.l2_present - 1) &
+		     (pfdev->features.l2_present - 2);
+	dev_info_once(pfdev->dev, "using only 1st core group (%lu cores from %lu)\n",
+		      hweight64(core_mask),
+		      hweight64(pfdev->features.shader_present));
+
+	return core_mask;
+}
+
+void panfrost_gpu_power_on(struct panfrost_device *pfdev)
+{
+	int ret;
+	u32 val;
+	u64 core_mask;
+
+	panfrost_gpu_init_quirks(pfdev);
+	core_mask = panfrost_get_core_mask(pfdev);
+
+	gpu_write(pfdev, L2_PWRON_LO, pfdev->features.l2_present & core_mask);
+	ret = readl_relaxed_poll_timeout(pfdev->iomem + L2_READY_LO,
+		val, val == (pfdev->features.l2_present & core_mask),
+		100, 20000);
+	if (ret)
+		dev_err(pfdev->dev, "error powering up gpu L2");
+
+	gpu_write(pfdev, SHADER_PWRON_LO,
+		  pfdev->features.shader_present & core_mask);
+	ret = readl_relaxed_poll_timeout(pfdev->iomem + SHADER_READY_LO,
+		val, val == (pfdev->features.shader_present & core_mask),
+		100, 20000);
+	if (ret)
+		dev_err(pfdev->dev, "error powering up gpu shader");
+
+	gpu_write(pfdev, TILER_PWRON_LO, pfdev->features.tiler_present);
+	ret = readl_relaxed_poll_timeout(pfdev->iomem + TILER_READY_LO,
+		val, val == pfdev->features.tiler_present, 100, 1000);
+	if (ret)
+		dev_err(pfdev->dev, "error powering up gpu tiler");
+}
+
+void panfrost_gpu_power_off(struct panfrost_device *pfdev)
+{
+	int ret;
+	u32 val;
+
+	gpu_write(pfdev, SHADER_PWROFF_LO, pfdev->features.shader_present);
+	ret = readl_relaxed_poll_timeout(pfdev->iomem + SHADER_PWRTRANS_LO,
+					 val, !val, 1, 1000);
+	if (ret)
+		dev_err(pfdev->dev, "shader power transition timeout");
+
+	gpu_write(pfdev, TILER_PWROFF_LO, pfdev->features.tiler_present);
+	ret = readl_relaxed_poll_timeout(pfdev->iomem + TILER_PWRTRANS_LO,
+					 val, !val, 1, 1000);
+	if (ret)
+		dev_err(pfdev->dev, "tiler power transition timeout");
+
+	gpu_write(pfdev, L2_PWROFF_LO, pfdev->features.l2_present);
+	ret = readl_poll_timeout(pfdev->iomem + L2_PWRTRANS_LO,
+				 val, !val, 0, 1000);
+	if (ret)
+		dev_err(pfdev->dev, "l2 power transition timeout");
+}
+
+int panfrost_gpu_init(struct panfrost_device *pfdev)
+{
+	int err, irq;
+
+	err = panfrost_gpu_soft_reset(pfdev);
+	if (err)
+		return err;
+
+	panfrost_gpu_init_features(pfdev);
+
+	err = dma_set_mask_and_coherent(pfdev->dev,
+		DMA_BIT_MASK(FIELD_GET(0xff00, pfdev->features.mmu_features)));
+	if (err)
+		return err;
+
+	dma_set_max_seg_size(pfdev->dev, UINT_MAX);
+
+	irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "gpu");
+	if (irq <= 0)
+		return -ENODEV;
+
+	err = devm_request_irq(pfdev->dev, irq, panfrost_gpu_irq_handler,
+			       IRQF_SHARED, KBUILD_MODNAME "-gpu", pfdev);
+	if (err) {
+		dev_err(pfdev->dev, "failed to request gpu irq");
+		return err;
+	}
+
+	panfrost_gpu_power_on(pfdev);
+
+	return 0;
+}
+
+void panfrost_gpu_fini(struct panfrost_device *pfdev)
+{
+	panfrost_gpu_power_off(pfdev);
+}
+
+u32 panfrost_gpu_get_latest_flush_id(struct panfrost_device *pfdev)
+{
+	u32 flush_id;
+
+	if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION)) {
+		/* Flush reduction only makes sense when the GPU is kept powered on between jobs */
+		if (pm_runtime_get_if_in_use(pfdev->dev)) {
+			flush_id = gpu_read(pfdev, GPU_LATEST_FLUSH_ID);
+			pm_runtime_put(pfdev->dev);
+			return flush_id;
+		}
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.h b/drivers/gpu/drm/panfrost/panfrost_gpu.h
new file mode 100644
index 0000000000..468c51e7e4
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_gpu.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Collabora ltd. */
+
+#ifndef __PANFROST_GPU_H__
+#define __PANFROST_GPU_H__
+
+struct panfrost_device;
+
+int panfrost_gpu_init(struct panfrost_device *pfdev);
+void panfrost_gpu_fini(struct panfrost_device *pfdev);
+
+u32 panfrost_gpu_get_latest_flush_id(struct panfrost_device *pfdev);
+
+int panfrost_gpu_soft_reset(struct panfrost_device *pfdev);
+void panfrost_gpu_power_on(struct panfrost_device *pfdev);
+void panfrost_gpu_power_off(struct panfrost_device *pfdev);
+
+void panfrost_gpu_amlogic_quirk(struct panfrost_device *pfdev);
+
+#endif
diff --git a/drivers/gpu/drm/panfrost/panfrost_issues.h b/drivers/gpu/drm/panfrost/panfrost_issues.h
new file mode 100644
index 0000000000..eb60cb8366
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_issues.h
@@ -0,0 +1,274 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. */
+/* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */
+#ifndef __PANFROST_ISSUES_H__
+#define __PANFROST_ISSUES_H__
+
+#include <linux/bitops.h>
+
+#include "panfrost_device.h"
+
+/*
+ * This is not a complete list of issues, but only the ones the driver needs
+ * to care about.
+ */
+enum panfrost_hw_issue {
+	/* Need way to guarantee that all previously-translated memory accesses
+	 * are committed */
+	HW_ISSUE_6367,
+
+	/* On job complete with non-done the cache is not flushed */
+	HW_ISSUE_6787,
+
+	/* Write of PRFCNT_CONFIG_MODE_MANUAL to PRFCNT_CONFIG causes a
+	 * instrumentation dump if PRFCNT_TILER_EN is enabled */
+	HW_ISSUE_8186,
+
+	/* TIB: Reports faults from a vtile which has not yet been allocated */
+	HW_ISSUE_8245,
+
+	/* uTLB deadlock could occur when writing to an invalid page at the
+	 * same time as access to a valid page in the same uTLB cache line ( ==
+	 * 4 PTEs == 16K block of mapping) */
+	HW_ISSUE_8316,
+
+	/* HT: TERMINATE for RUN command ignored if previous LOAD_DESCRIPTOR is
+	 * still executing */
+	HW_ISSUE_8394,
+
+	/* CSE: Sends a TERMINATED response for a task that should not be
+	 * terminated */
+	HW_ISSUE_8401,
+
+	/* Repeatedly Soft-stopping a job chain consisting of (Vertex Shader,
+	 * Cache Flush, Tiler) jobs causes DATA_INVALID_FAULT on tiler job. */
+	HW_ISSUE_8408,
+
+	/* Disable the Pause Buffer in the LS pipe. */
+	HW_ISSUE_8443,
+
+	/* Change in RMUs in use causes problems related with the core's SDC */
+	HW_ISSUE_8987,
+
+	/* Compute endpoint has a 4-deep queue of tasks, meaning a soft stop
+	 * won't complete until all 4 tasks have completed */
+	HW_ISSUE_9435,
+
+	/* HT: Tiler returns TERMINATED for non-terminated command */
+	HW_ISSUE_9510,
+
+	/* Occasionally the GPU will issue multiple page faults for the same
+	 * address before the MMU page table has been read by the GPU */
+	HW_ISSUE_9630,
+
+	/* RA DCD load request to SDC returns invalid load ignore causing
+	 * colour buffer mismatch */
+	HW_ISSUE_10327,
+
+	/* MMU TLB invalidation hazards */
+	HW_ISSUE_10649,
+
+	/* Missing cache flush in multi core-group configuration */
+	HW_ISSUE_10676,
+
+	/* Chicken bit on T72X for a hardware workaround in compiler */
+	HW_ISSUE_10797,
+
+	/* Soft-stopping fragment jobs might fail with TILE_RANGE_FAULT */
+	HW_ISSUE_10817,
+
+	/* Intermittent missing interrupt on job completion */
+	HW_ISSUE_10883,
+
+	/* Soft-stopping fragment jobs might fail with TILE_RANGE_ERROR
+	 * (similar to issue 10817) and can use #10817 workaround */
+	HW_ISSUE_10959,
+
+	/* Soft-stopped fragment shader job can restart with out-of-bound
+	 * restart index */
+	HW_ISSUE_10969,
+
+	/* Race condition can cause tile list corruption */
+	HW_ISSUE_11020,
+
+	/* Write buffer can cause tile list corruption */
+	HW_ISSUE_11024,
+
+	/* Pause buffer can cause a fragment job hang */
+	HW_ISSUE_11035,
+
+	/* Dynamic Core Scaling not supported due to errata */
+	HW_ISSUE_11056,
+
+	/* Clear encoder state for a hard stopped fragment job which is AFBC
+	 * encoded by soft resetting the GPU. Only for T76X r0p0, r0p1 and
+	 * r0p1_50rel0 */
+	HW_ISSUE_T76X_3542,
+
+	/* Keep tiler module clock on to prevent GPU stall */
+	HW_ISSUE_T76X_3953,
+
+	/* Must ensure L2 is not transitioning when we reset. Workaround with a
+	 * busy wait until L2 completes transition; ensure there is a maximum
+	 * loop count as she may never complete her transition. (On chips
+	 * without this errata, it's totally okay if L2 transitions.) */
+	HW_ISSUE_TMIX_8463,
+
+	/* Don't set SC_LS_ATTR_CHECK_DISABLE/SC_LS_ALLOW_ATTR_TYPES */
+	GPUCORE_1619,
+
+	/* When a hard-stop follows close after a soft-stop, the completion
+	 * code for the terminated job may be incorrectly set to STOPPED */
+	HW_ISSUE_TMIX_8438,
+
+	/* "Protected mode" is buggy on Mali-G31 some Bifrost chips, so the
+	 * kernel must fiddle with L2 caches to prevent data leakage */
+	HW_ISSUE_TGOX_R1_1234,
+
+	/* Must set SC_VAR_ALGORITHM */
+	HW_ISSUE_TTRX_2968_TTRX_3162,
+
+	/* Bus fault from occlusion query write may cause future fragment jobs
+	 * to hang */
+	HW_ISSUE_TTRX_3076,
+
+	/* Must issue a dummy job before starting real work to prevent hangs */
+	HW_ISSUE_TTRX_3485,
+
+	HW_ISSUE_END
+};
+
+#define hw_issues_all (\
+	BIT_ULL(HW_ISSUE_9435))
+
+#define hw_issues_t600 (\
+	BIT_ULL(HW_ISSUE_6367) | \
+	BIT_ULL(HW_ISSUE_6787) | \
+	BIT_ULL(HW_ISSUE_8408) | \
+	BIT_ULL(HW_ISSUE_9510) | \
+	BIT_ULL(HW_ISSUE_10649) | \
+	BIT_ULL(HW_ISSUE_10676) | \
+	BIT_ULL(HW_ISSUE_10883) | \
+	BIT_ULL(HW_ISSUE_11020) | \
+	BIT_ULL(HW_ISSUE_11035) | \
+	BIT_ULL(HW_ISSUE_11056) | \
+	BIT_ULL(HW_ISSUE_TMIX_8438))
+
+#define hw_issues_t600_r0p0_15dev0 (\
+	BIT_ULL(HW_ISSUE_8186) | \
+	BIT_ULL(HW_ISSUE_8245) | \
+	BIT_ULL(HW_ISSUE_8316) | \
+	BIT_ULL(HW_ISSUE_8394) | \
+	BIT_ULL(HW_ISSUE_8401) | \
+	BIT_ULL(HW_ISSUE_8443) | \
+	BIT_ULL(HW_ISSUE_8987) | \
+	BIT_ULL(HW_ISSUE_9630) | \
+	BIT_ULL(HW_ISSUE_10969) | \
+	BIT_ULL(GPUCORE_1619))
+
+#define hw_issues_t620 (\
+	BIT_ULL(HW_ISSUE_10649) | \
+	BIT_ULL(HW_ISSUE_10883) | \
+	BIT_ULL(HW_ISSUE_10959) | \
+	BIT_ULL(HW_ISSUE_11056) | \
+	BIT_ULL(HW_ISSUE_TMIX_8438))
+
+#define hw_issues_t620_r0p1 (\
+	BIT_ULL(HW_ISSUE_10327) | \
+	BIT_ULL(HW_ISSUE_10676) | \
+	BIT_ULL(HW_ISSUE_10817) | \
+	BIT_ULL(HW_ISSUE_11020) | \
+	BIT_ULL(HW_ISSUE_11024) | \
+	BIT_ULL(HW_ISSUE_11035))
+
+#define hw_issues_t620_r1p0 (\
+	BIT_ULL(HW_ISSUE_11020) | \
+	BIT_ULL(HW_ISSUE_11024))
+
+#define hw_issues_t720 (\
+	BIT_ULL(HW_ISSUE_10649) | \
+	BIT_ULL(HW_ISSUE_10797) | \
+	BIT_ULL(HW_ISSUE_10883) | \
+	BIT_ULL(HW_ISSUE_11056) | \
+	BIT_ULL(HW_ISSUE_TMIX_8438))
+
+#define hw_issues_t760 (\
+	BIT_ULL(HW_ISSUE_10883) | \
+	BIT_ULL(HW_ISSUE_T76X_3953) | \
+	BIT_ULL(HW_ISSUE_TMIX_8438))
+
+#define hw_issues_t760_r0p0 (\
+	BIT_ULL(HW_ISSUE_11020) | \
+	BIT_ULL(HW_ISSUE_11024) | \
+	BIT_ULL(HW_ISSUE_T76X_3542))
+
+#define hw_issues_t760_r0p1 (\
+	BIT_ULL(HW_ISSUE_11020) | \
+	BIT_ULL(HW_ISSUE_11024) | \
+	BIT_ULL(HW_ISSUE_T76X_3542))
+
+#define hw_issues_t760_r0p1_50rel0 (\
+	BIT_ULL(HW_ISSUE_T76X_3542))
+
+#define hw_issues_t760_r0p2 (\
+	BIT_ULL(HW_ISSUE_11020) | \
+	BIT_ULL(HW_ISSUE_11024) | \
+	BIT_ULL(HW_ISSUE_T76X_3542))
+
+#define hw_issues_t760_r0p3 (\
+	BIT_ULL(HW_ISSUE_T76X_3542))
+
+#define hw_issues_t820 (\
+	BIT_ULL(HW_ISSUE_10883) | \
+	BIT_ULL(HW_ISSUE_T76X_3953) | \
+	BIT_ULL(HW_ISSUE_TMIX_8438))
+
+#define hw_issues_t830 (\
+	BIT_ULL(HW_ISSUE_10883) | \
+	BIT_ULL(HW_ISSUE_T76X_3953) | \
+	BIT_ULL(HW_ISSUE_TMIX_8438))
+
+#define hw_issues_t860 (\
+	BIT_ULL(HW_ISSUE_10883) | \
+	BIT_ULL(HW_ISSUE_T76X_3953) | \
+	BIT_ULL(HW_ISSUE_TMIX_8438))
+
+#define hw_issues_t880 (\
+	BIT_ULL(HW_ISSUE_10883) | \
+	BIT_ULL(HW_ISSUE_T76X_3953) | \
+	BIT_ULL(HW_ISSUE_TMIX_8438))
+
+#define hw_issues_g31 0
+
+#define hw_issues_g31_r1p0 (\
+	BIT_ULL(HW_ISSUE_TGOX_R1_1234))
+
+#define hw_issues_g51 0
+
+#define hw_issues_g52 0
+
+#define hw_issues_g71 (\
+	BIT_ULL(HW_ISSUE_TMIX_8463) | \
+	BIT_ULL(HW_ISSUE_TMIX_8438))
+
+#define hw_issues_g71_r0p0_05dev0 (\
+	BIT_ULL(HW_ISSUE_T76X_3953))
+
+#define hw_issues_g72 0
+
+#define hw_issues_g76 0
+
+#define hw_issues_g57 (\
+	BIT_ULL(HW_ISSUE_TTRX_2968_TTRX_3162) | \
+	BIT_ULL(HW_ISSUE_TTRX_3076))
+
+#define hw_issues_g57_r0p0 (\
+	BIT_ULL(HW_ISSUE_TTRX_3485))
+
+static inline bool panfrost_has_hw_issue(const struct panfrost_device *pfdev,
+					 enum panfrost_hw_issue issue)
+{
+	return test_bit(issue, pfdev->features.hw_issues);
+}
+
+#endif /* __PANFROST_ISSUES_H__ */
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
new file mode 100644
index 0000000000..a8b4827dc4
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -0,0 +1,946 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+/* Copyright 2019 Collabora ltd. */
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/dma-resv.h>
+#include <drm/gpu_scheduler.h>
+#include <drm/panfrost_drm.h>
+
+#include "panfrost_device.h"
+#include "panfrost_devfreq.h"
+#include "panfrost_job.h"
+#include "panfrost_features.h"
+#include "panfrost_issues.h"
+#include "panfrost_gem.h"
+#include "panfrost_regs.h"
+#include "panfrost_gpu.h"
+#include "panfrost_mmu.h"
+#include "panfrost_dump.h"
+
+#define JOB_TIMEOUT_MS 500
+
+#define job_write(dev, reg, data) writel(data, dev->iomem + (reg))
+#define job_read(dev, reg) readl(dev->iomem + (reg))
+
+struct panfrost_queue_state {
+	struct drm_gpu_scheduler sched;
+	u64 fence_context;
+	u64 emit_seqno;
+};
+
+struct panfrost_job_slot {
+	struct panfrost_queue_state queue[NUM_JOB_SLOTS];
+	spinlock_t job_lock;
+	int irq;
+};
+
+static struct panfrost_job *
+to_panfrost_job(struct drm_sched_job *sched_job)
+{
+	return container_of(sched_job, struct panfrost_job, base);
+}
+
+struct panfrost_fence {
+	struct dma_fence base;
+	struct drm_device *dev;
+	/* panfrost seqno for signaled() test */
+	u64 seqno;
+	int queue;
+};
+
+static inline struct panfrost_fence *
+to_panfrost_fence(struct dma_fence *fence)
+{
+	return (struct panfrost_fence *)fence;
+}
+
+static const char *panfrost_fence_get_driver_name(struct dma_fence *fence)
+{
+	return "panfrost";
+}
+
+static const char *panfrost_fence_get_timeline_name(struct dma_fence *fence)
+{
+	struct panfrost_fence *f = to_panfrost_fence(fence);
+
+	switch (f->queue) {
+	case 0:
+		return "panfrost-js-0";
+	case 1:
+		return "panfrost-js-1";
+	case 2:
+		return "panfrost-js-2";
+	default:
+		return NULL;
+	}
+}
+
+static const struct dma_fence_ops panfrost_fence_ops = {
+	.get_driver_name = panfrost_fence_get_driver_name,
+	.get_timeline_name = panfrost_fence_get_timeline_name,
+};
+
+static struct dma_fence *panfrost_fence_create(struct panfrost_device *pfdev, int js_num)
+{
+	struct panfrost_fence *fence;
+	struct panfrost_job_slot *js = pfdev->js;
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return ERR_PTR(-ENOMEM);
+
+	fence->dev = pfdev->ddev;
+	fence->queue = js_num;
+	fence->seqno = ++js->queue[js_num].emit_seqno;
+	dma_fence_init(&fence->base, &panfrost_fence_ops, &js->job_lock,
+		       js->queue[js_num].fence_context, fence->seqno);
+
+	return &fence->base;
+}
+
+int panfrost_job_get_slot(struct panfrost_job *job)
+{
+	/* JS0: fragment jobs.
+	 * JS1: vertex/tiler jobs
+	 * JS2: compute jobs
+	 */
+	if (job->requirements & PANFROST_JD_REQ_FS)
+		return 0;
+
+/* Not exposed to userspace yet */
+#if 0
+	if (job->requirements & PANFROST_JD_REQ_ONLY_COMPUTE) {
+		if ((job->requirements & PANFROST_JD_REQ_CORE_GRP_MASK) &&
+		    (job->pfdev->features.nr_core_groups == 2))
+			return 2;
+		if (panfrost_has_hw_issue(job->pfdev, HW_ISSUE_8987))
+			return 2;
+	}
+#endif
+	return 1;
+}
+
+static void panfrost_job_write_affinity(struct panfrost_device *pfdev,
+					u32 requirements,
+					int js)
+{
+	u64 affinity;
+
+	/*
+	 * Use all cores for now.
+	 * Eventually we may need to support tiler only jobs and h/w with
+	 * multiple (2) coherent core groups
+	 */
+	affinity = pfdev->features.shader_present;
+
+	job_write(pfdev, JS_AFFINITY_NEXT_LO(js), lower_32_bits(affinity));
+	job_write(pfdev, JS_AFFINITY_NEXT_HI(js), upper_32_bits(affinity));
+}
+
+static u32
+panfrost_get_job_chain_flag(const struct panfrost_job *job)
+{
+	struct panfrost_fence *f = to_panfrost_fence(job->done_fence);
+
+	if (!panfrost_has_hw_feature(job->pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION))
+		return 0;
+
+	return (f->seqno & 1) ? JS_CONFIG_JOB_CHAIN_FLAG : 0;
+}
+
+static struct panfrost_job *
+panfrost_dequeue_job(struct panfrost_device *pfdev, int slot)
+{
+	struct panfrost_job *job = pfdev->jobs[slot][0];
+
+	WARN_ON(!job);
+	pfdev->jobs[slot][0] = pfdev->jobs[slot][1];
+	pfdev->jobs[slot][1] = NULL;
+
+	return job;
+}
+
+static unsigned int
+panfrost_enqueue_job(struct panfrost_device *pfdev, int slot,
+		     struct panfrost_job *job)
+{
+	if (WARN_ON(!job))
+		return 0;
+
+	if (!pfdev->jobs[slot][0]) {
+		pfdev->jobs[slot][0] = job;
+		return 0;
+	}
+
+	WARN_ON(pfdev->jobs[slot][1]);
+	pfdev->jobs[slot][1] = job;
+	WARN_ON(panfrost_get_job_chain_flag(job) ==
+		panfrost_get_job_chain_flag(pfdev->jobs[slot][0]));
+	return 1;
+}
+
+static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
+{
+	struct panfrost_device *pfdev = job->pfdev;
+	unsigned int subslot;
+	u32 cfg;
+	u64 jc_head = job->jc;
+	int ret;
+
+	panfrost_devfreq_record_busy(&pfdev->pfdevfreq);
+
+	ret = pm_runtime_get_sync(pfdev->dev);
+	if (ret < 0)
+		return;
+
+	if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js)))) {
+		return;
+	}
+
+	cfg = panfrost_mmu_as_get(pfdev, job->mmu);
+
+	job_write(pfdev, JS_HEAD_NEXT_LO(js), lower_32_bits(jc_head));
+	job_write(pfdev, JS_HEAD_NEXT_HI(js), upper_32_bits(jc_head));
+
+	panfrost_job_write_affinity(pfdev, job->requirements, js);
+
+	/* start MMU, medium priority, cache clean/flush on end, clean/flush on
+	 * start */
+	cfg |= JS_CONFIG_THREAD_PRI(8) |
+		JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE |
+		JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE |
+		panfrost_get_job_chain_flag(job);
+
+	if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION))
+		cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
+
+	if (panfrost_has_hw_issue(pfdev, HW_ISSUE_10649))
+		cfg |= JS_CONFIG_START_MMU;
+
+	job_write(pfdev, JS_CONFIG_NEXT(js), cfg);
+
+	if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION))
+		job_write(pfdev, JS_FLUSH_ID_NEXT(js), job->flush_id);
+
+	/* GO ! */
+
+	spin_lock(&pfdev->js->job_lock);
+	subslot = panfrost_enqueue_job(pfdev, js, job);
+	/* Don't queue the job if a reset is in progress */
+	if (!atomic_read(&pfdev->reset.pending)) {
+		job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START);
+		dev_dbg(pfdev->dev,
+			"JS: Submitting atom %p to js[%d][%d] with head=0x%llx AS %d",
+			job, js, subslot, jc_head, cfg & 0xf);
+	}
+	spin_unlock(&pfdev->js->job_lock);
+}
+
+static int panfrost_acquire_object_fences(struct drm_gem_object **bos,
+					  int bo_count,
+					  struct drm_sched_job *job)
+{
+	int i, ret;
+
+	for (i = 0; i < bo_count; i++) {
+		ret = dma_resv_reserve_fences(bos[i]->resv, 1);
+		if (ret)
+			return ret;
+
+		/* panfrost always uses write mode in its current uapi */
+		ret = drm_sched_job_add_implicit_dependencies(job, bos[i],
+							      true);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static void panfrost_attach_object_fences(struct drm_gem_object **bos,
+					  int bo_count,
+					  struct dma_fence *fence)
+{
+	int i;
+
+	for (i = 0; i < bo_count; i++)
+		dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE);
+}
+
+int panfrost_job_push(struct panfrost_job *job)
+{
+	struct panfrost_device *pfdev = job->pfdev;
+	struct ww_acquire_ctx acquire_ctx;
+	int ret = 0;
+
+	ret = drm_gem_lock_reservations(job->bos, job->bo_count,
+					    &acquire_ctx);
+	if (ret)
+		return ret;
+
+	mutex_lock(&pfdev->sched_lock);
+	drm_sched_job_arm(&job->base);
+
+	job->render_done_fence = dma_fence_get(&job->base.s_fence->finished);
+
+	ret = panfrost_acquire_object_fences(job->bos, job->bo_count,
+					     &job->base);
+	if (ret) {
+		mutex_unlock(&pfdev->sched_lock);
+		goto unlock;
+	}
+
+	kref_get(&job->refcount); /* put by scheduler job completion */
+
+	drm_sched_entity_push_job(&job->base);
+
+	mutex_unlock(&pfdev->sched_lock);
+
+	panfrost_attach_object_fences(job->bos, job->bo_count,
+				      job->render_done_fence);
+
+unlock:
+	drm_gem_unlock_reservations(job->bos, job->bo_count, &acquire_ctx);
+
+	return ret;
+}
+
+static void panfrost_job_cleanup(struct kref *ref)
+{
+	struct panfrost_job *job = container_of(ref, struct panfrost_job,
+						refcount);
+	unsigned int i;
+
+	dma_fence_put(job->done_fence);
+	dma_fence_put(job->render_done_fence);
+
+	if (job->mappings) {
+		for (i = 0; i < job->bo_count; i++) {
+			if (!job->mappings[i])
+				break;
+
+			atomic_dec(&job->mappings[i]->obj->gpu_usecount);
+			panfrost_gem_mapping_put(job->mappings[i]);
+		}
+		kvfree(job->mappings);
+	}
+
+	if (job->bos) {
+		for (i = 0; i < job->bo_count; i++)
+			drm_gem_object_put(job->bos[i]);
+
+		kvfree(job->bos);
+	}
+
+	kfree(job);
+}
+
+void panfrost_job_put(struct panfrost_job *job)
+{
+	kref_put(&job->refcount, panfrost_job_cleanup);
+}
+
+static void panfrost_job_free(struct drm_sched_job *sched_job)
+{
+	struct panfrost_job *job = to_panfrost_job(sched_job);
+
+	drm_sched_job_cleanup(sched_job);
+
+	panfrost_job_put(job);
+}
+
+static struct dma_fence *panfrost_job_run(struct drm_sched_job *sched_job)
+{
+	struct panfrost_job *job = to_panfrost_job(sched_job);
+	struct panfrost_device *pfdev = job->pfdev;
+	int slot = panfrost_job_get_slot(job);
+	struct dma_fence *fence = NULL;
+
+	if (unlikely(job->base.s_fence->finished.error))
+		return NULL;
+
+	/* Nothing to execute: can happen if the job has finished while
+	 * we were resetting the GPU.
+	 */
+	if (!job->jc)
+		return NULL;
+
+	fence = panfrost_fence_create(pfdev, slot);
+	if (IS_ERR(fence))
+		return fence;
+
+	if (job->done_fence)
+		dma_fence_put(job->done_fence);
+	job->done_fence = dma_fence_get(fence);
+
+	panfrost_job_hw_submit(job, slot);
+
+	return fence;
+}
+
+void panfrost_job_enable_interrupts(struct panfrost_device *pfdev)
+{
+	int j;
+	u32 irq_mask = 0;
+
+	for (j = 0; j < NUM_JOB_SLOTS; j++) {
+		irq_mask |= MK_JS_MASK(j);
+	}
+
+	job_write(pfdev, JOB_INT_CLEAR, irq_mask);
+	job_write(pfdev, JOB_INT_MASK, irq_mask);
+}
+
+static void panfrost_job_handle_err(struct panfrost_device *pfdev,
+				    struct panfrost_job *job,
+				    unsigned int js)
+{
+	u32 js_status = job_read(pfdev, JS_STATUS(js));
+	const char *exception_name = panfrost_exception_name(js_status);
+	bool signal_fence = true;
+
+	if (!panfrost_exception_is_fault(js_status)) {
+		dev_dbg(pfdev->dev, "js event, js=%d, status=%s, head=0x%x, tail=0x%x",
+			js, exception_name,
+			job_read(pfdev, JS_HEAD_LO(js)),
+			job_read(pfdev, JS_TAIL_LO(js)));
+	} else {
+		dev_err(pfdev->dev, "js fault, js=%d, status=%s, head=0x%x, tail=0x%x",
+			js, exception_name,
+			job_read(pfdev, JS_HEAD_LO(js)),
+			job_read(pfdev, JS_TAIL_LO(js)));
+	}
+
+	if (js_status == DRM_PANFROST_EXCEPTION_STOPPED) {
+		/* Update the job head so we can resume */
+		job->jc = job_read(pfdev, JS_TAIL_LO(js)) |
+			  ((u64)job_read(pfdev, JS_TAIL_HI(js)) << 32);
+
+		/* The job will be resumed, don't signal the fence */
+		signal_fence = false;
+	} else if (js_status == DRM_PANFROST_EXCEPTION_TERMINATED) {
+		/* Job has been hard-stopped, flag it as canceled */
+		dma_fence_set_error(job->done_fence, -ECANCELED);
+		job->jc = 0;
+	} else if (panfrost_exception_is_fault(js_status)) {
+		/* We might want to provide finer-grained error code based on
+		 * the exception type, but unconditionally setting to EINVAL
+		 * is good enough for now.
+		 */
+		dma_fence_set_error(job->done_fence, -EINVAL);
+		job->jc = 0;
+	}
+
+	panfrost_mmu_as_put(pfdev, job->mmu);
+	panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
+
+	if (signal_fence)
+		dma_fence_signal_locked(job->done_fence);
+
+	pm_runtime_put_autosuspend(pfdev->dev);
+
+	if (panfrost_exception_needs_reset(pfdev, js_status)) {
+		atomic_set(&pfdev->reset.pending, 1);
+		drm_sched_fault(&pfdev->js->queue[js].sched);
+	}
+}
+
+static void panfrost_job_handle_done(struct panfrost_device *pfdev,
+				     struct panfrost_job *job)
+{
+	/* Set ->jc to 0 to avoid re-submitting an already finished job (can
+	 * happen when we receive the DONE interrupt while doing a GPU reset).
+	 */
+	job->jc = 0;
+	panfrost_mmu_as_put(pfdev, job->mmu);
+	panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
+
+	dma_fence_signal_locked(job->done_fence);
+	pm_runtime_put_autosuspend(pfdev->dev);
+}
+
+static void panfrost_job_handle_irq(struct panfrost_device *pfdev, u32 status)
+{
+	struct panfrost_job *done[NUM_JOB_SLOTS][2] = {};
+	struct panfrost_job *failed[NUM_JOB_SLOTS] = {};
+	u32 js_state = 0, js_events = 0;
+	unsigned int i, j;
+
+	/* First we collect all failed/done jobs. */
+	while (status) {
+		u32 js_state_mask = 0;
+
+		for (j = 0; j < NUM_JOB_SLOTS; j++) {
+			if (status & MK_JS_MASK(j))
+				js_state_mask |= MK_JS_MASK(j);
+
+			if (status & JOB_INT_MASK_DONE(j)) {
+				if (done[j][0])
+					done[j][1] = panfrost_dequeue_job(pfdev, j);
+				else
+					done[j][0] = panfrost_dequeue_job(pfdev, j);
+			}
+
+			if (status & JOB_INT_MASK_ERR(j)) {
+				/* Cancel the next submission. Will be submitted
+				 * after we're done handling this failure if
+				 * there's no reset pending.
+				 */
+				job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_NOP);
+				failed[j] = panfrost_dequeue_job(pfdev, j);
+			}
+		}
+
+		/* JS_STATE is sampled when JOB_INT_CLEAR is written.
+		 * For each BIT(slot) or BIT(slot + 16) bit written to
+		 * JOB_INT_CLEAR, the corresponding bits in JS_STATE
+		 * (BIT(slot) and BIT(slot + 16)) are updated, but this
+		 * is racy. If we only have one job done at the time we
+		 * read JOB_INT_RAWSTAT but the second job fails before we
+		 * clear the status, we end up with a status containing
+		 * only the DONE bit and consider both jobs as DONE since
+		 * JS_STATE reports both NEXT and CURRENT as inactive.
+		 * To prevent that, let's repeat this clear+read steps
+		 * until status is 0.
+		 */
+		job_write(pfdev, JOB_INT_CLEAR, status);
+		js_state &= ~js_state_mask;
+		js_state |= job_read(pfdev, JOB_INT_JS_STATE) & js_state_mask;
+		js_events |= status;
+		status = job_read(pfdev, JOB_INT_RAWSTAT);
+	}
+
+	/* Then we handle the dequeued jobs. */
+	for (j = 0; j < NUM_JOB_SLOTS; j++) {
+		if (!(js_events & MK_JS_MASK(j)))
+			continue;
+
+		if (failed[j]) {
+			panfrost_job_handle_err(pfdev, failed[j], j);
+		} else if (pfdev->jobs[j][0] && !(js_state & MK_JS_MASK(j))) {
+			/* When the current job doesn't fail, the JM dequeues
+			 * the next job without waiting for an ACK, this means
+			 * we can have 2 jobs dequeued and only catch the
+			 * interrupt when the second one is done. If both slots
+			 * are inactive, but one job remains in pfdev->jobs[j],
+			 * consider it done. Of course that doesn't apply if a
+			 * failure happened since we cancelled execution of the
+			 * job in _NEXT (see above).
+			 */
+			if (WARN_ON(!done[j][0]))
+				done[j][0] = panfrost_dequeue_job(pfdev, j);
+			else
+				done[j][1] = panfrost_dequeue_job(pfdev, j);
+		}
+
+		for (i = 0; i < ARRAY_SIZE(done[0]) && done[j][i]; i++)
+			panfrost_job_handle_done(pfdev, done[j][i]);
+	}
+
+	/* And finally we requeue jobs that were waiting in the second slot
+	 * and have been stopped if we detected a failure on the first slot.
+	 */
+	for (j = 0; j < NUM_JOB_SLOTS; j++) {
+		if (!(js_events & MK_JS_MASK(j)))
+			continue;
+
+		if (!failed[j] || !pfdev->jobs[j][0])
+			continue;
+
+		if (pfdev->jobs[j][0]->jc == 0) {
+			/* The job was cancelled, signal the fence now */
+			struct panfrost_job *canceled = panfrost_dequeue_job(pfdev, j);
+
+			dma_fence_set_error(canceled->done_fence, -ECANCELED);
+			panfrost_job_handle_done(pfdev, canceled);
+		} else if (!atomic_read(&pfdev->reset.pending)) {
+			/* Requeue the job we removed if no reset is pending */
+			job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_START);
+		}
+	}
+}
+
+static void panfrost_job_handle_irqs(struct panfrost_device *pfdev)
+{
+	u32 status = job_read(pfdev, JOB_INT_RAWSTAT);
+
+	while (status) {
+		pm_runtime_mark_last_busy(pfdev->dev);
+
+		spin_lock(&pfdev->js->job_lock);
+		panfrost_job_handle_irq(pfdev, status);
+		spin_unlock(&pfdev->js->job_lock);
+		status = job_read(pfdev, JOB_INT_RAWSTAT);
+	}
+}
+
+static u32 panfrost_active_slots(struct panfrost_device *pfdev,
+				 u32 *js_state_mask, u32 js_state)
+{
+	u32 rawstat;
+
+	if (!(js_state & *js_state_mask))
+		return 0;
+
+	rawstat = job_read(pfdev, JOB_INT_RAWSTAT);
+	if (rawstat) {
+		unsigned int i;
+
+		for (i = 0; i < NUM_JOB_SLOTS; i++) {
+			if (rawstat & MK_JS_MASK(i))
+				*js_state_mask &= ~MK_JS_MASK(i);
+		}
+	}
+
+	return js_state & *js_state_mask;
+}
+
+static void
+panfrost_reset(struct panfrost_device *pfdev,
+	       struct drm_sched_job *bad)
+{
+	u32 js_state, js_state_mask = 0xffffffff;
+	unsigned int i, j;
+	bool cookie;
+	int ret;
+
+	if (!atomic_read(&pfdev->reset.pending))
+		return;
+
+	/* Stop the schedulers.
+	 *
+	 * FIXME: We temporarily get out of the dma_fence_signalling section
+	 * because the cleanup path generate lockdep splats when taking locks
+	 * to release job resources. We should rework the code to follow this
+	 * pattern:
+	 *
+	 *	try_lock
+	 *	if (locked)
+	 *		release
+	 *	else
+	 *		schedule_work_to_release_later
+	 */
+	for (i = 0; i < NUM_JOB_SLOTS; i++)
+		drm_sched_stop(&pfdev->js->queue[i].sched, bad);
+
+	cookie = dma_fence_begin_signalling();
+
+	if (bad)
+		drm_sched_increase_karma(bad);
+
+	/* Mask job interrupts and synchronize to make sure we won't be
+	 * interrupted during our reset.
+	 */
+	job_write(pfdev, JOB_INT_MASK, 0);
+	synchronize_irq(pfdev->js->irq);
+
+	for (i = 0; i < NUM_JOB_SLOTS; i++) {
+		/* Cancel the next job and soft-stop the running job. */
+		job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP);
+		job_write(pfdev, JS_COMMAND(i), JS_COMMAND_SOFT_STOP);
+	}
+
+	/* Wait at most 10ms for soft-stops to complete */
+	ret = readl_poll_timeout(pfdev->iomem + JOB_INT_JS_STATE, js_state,
+				 !panfrost_active_slots(pfdev, &js_state_mask, js_state),
+				 10, 10000);
+
+	if (ret)
+		dev_err(pfdev->dev, "Soft-stop failed\n");
+
+	/* Handle the remaining interrupts before we reset. */
+	panfrost_job_handle_irqs(pfdev);
+
+	/* Remaining interrupts have been handled, but we might still have
+	 * stuck jobs. Let's make sure the PM counters stay balanced by
+	 * manually calling pm_runtime_put_noidle() and
+	 * panfrost_devfreq_record_idle() for each stuck job.
+	 */
+	spin_lock(&pfdev->js->job_lock);
+	for (i = 0; i < NUM_JOB_SLOTS; i++) {
+		for (j = 0; j < ARRAY_SIZE(pfdev->jobs[0]) && pfdev->jobs[i][j]; j++) {
+			pm_runtime_put_noidle(pfdev->dev);
+			panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
+		}
+	}
+	memset(pfdev->jobs, 0, sizeof(pfdev->jobs));
+	spin_unlock(&pfdev->js->job_lock);
+
+	/* Proceed with reset now. */
+	panfrost_device_reset(pfdev);
+
+	/* panfrost_device_reset() unmasks job interrupts, but we want to
+	 * keep them masked a bit longer.
+	 */
+	job_write(pfdev, JOB_INT_MASK, 0);
+
+	/* GPU has been reset, we can clear the reset pending bit. */
+	atomic_set(&pfdev->reset.pending, 0);
+
+	/* Now resubmit jobs that were previously queued but didn't have a
+	 * chance to finish.
+	 * FIXME: We temporarily get out of the DMA fence signalling section
+	 * while resubmitting jobs because the job submission logic will
+	 * allocate memory with the GFP_KERNEL flag which can trigger memory
+	 * reclaim and exposes a lock ordering issue.
+	 */
+	dma_fence_end_signalling(cookie);
+	for (i = 0; i < NUM_JOB_SLOTS; i++)
+		drm_sched_resubmit_jobs(&pfdev->js->queue[i].sched);
+	cookie = dma_fence_begin_signalling();
+
+	/* Restart the schedulers */
+	for (i = 0; i < NUM_JOB_SLOTS; i++)
+		drm_sched_start(&pfdev->js->queue[i].sched, true);
+
+	/* Re-enable job interrupts now that everything has been restarted. */
+	job_write(pfdev, JOB_INT_MASK,
+		  GENMASK(16 + NUM_JOB_SLOTS - 1, 16) |
+		  GENMASK(NUM_JOB_SLOTS - 1, 0));
+
+	dma_fence_end_signalling(cookie);
+}
+
+static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job
+						     *sched_job)
+{
+	struct panfrost_job *job = to_panfrost_job(sched_job);
+	struct panfrost_device *pfdev = job->pfdev;
+	int js = panfrost_job_get_slot(job);
+
+	/*
+	 * If the GPU managed to complete this jobs fence, the timeout is
+	 * spurious. Bail out.
+	 */
+	if (dma_fence_is_signaled(job->done_fence))
+		return DRM_GPU_SCHED_STAT_NOMINAL;
+
+	/*
+	 * Panfrost IRQ handler may take a long time to process an interrupt
+	 * if there is another IRQ handler hogging the processing.
+	 * For example, the HDMI encoder driver might be stuck in the IRQ
+	 * handler for a significant time in a case of bad cable connection.
+	 * In order to catch such cases and not report spurious Panfrost
+	 * job timeouts, synchronize the IRQ handler and re-check the fence
+	 * status.
+	 */
+	synchronize_irq(pfdev->js->irq);
+
+	if (dma_fence_is_signaled(job->done_fence)) {
+		dev_warn(pfdev->dev, "unexpectedly high interrupt latency\n");
+		return DRM_GPU_SCHED_STAT_NOMINAL;
+	}
+
+	dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p",
+		js,
+		job_read(pfdev, JS_CONFIG(js)),
+		job_read(pfdev, JS_STATUS(js)),
+		job_read(pfdev, JS_HEAD_LO(js)),
+		job_read(pfdev, JS_TAIL_LO(js)),
+		sched_job);
+
+	panfrost_core_dump(job);
+
+	atomic_set(&pfdev->reset.pending, 1);
+	panfrost_reset(pfdev, sched_job);
+
+	return DRM_GPU_SCHED_STAT_NOMINAL;
+}
+
+static void panfrost_reset_work(struct work_struct *work)
+{
+	struct panfrost_device *pfdev;
+
+	pfdev = container_of(work, struct panfrost_device, reset.work);
+	panfrost_reset(pfdev, NULL);
+}
+
+static const struct drm_sched_backend_ops panfrost_sched_ops = {
+	.run_job = panfrost_job_run,
+	.timedout_job = panfrost_job_timedout,
+	.free_job = panfrost_job_free
+};
+
+static irqreturn_t panfrost_job_irq_handler_thread(int irq, void *data)
+{
+	struct panfrost_device *pfdev = data;
+
+	panfrost_job_handle_irqs(pfdev);
+	job_write(pfdev, JOB_INT_MASK,
+		  GENMASK(16 + NUM_JOB_SLOTS - 1, 16) |
+		  GENMASK(NUM_JOB_SLOTS - 1, 0));
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t panfrost_job_irq_handler(int irq, void *data)
+{
+	struct panfrost_device *pfdev = data;
+	u32 status = job_read(pfdev, JOB_INT_STAT);
+
+	if (!status)
+		return IRQ_NONE;
+
+	job_write(pfdev, JOB_INT_MASK, 0);
+	return IRQ_WAKE_THREAD;
+}
+
+int panfrost_job_init(struct panfrost_device *pfdev)
+{
+	struct panfrost_job_slot *js;
+	unsigned int nentries = 2;
+	int ret, j;
+
+	/* All GPUs have two entries per queue, but without jobchain
+	 * disambiguation stopping the right job in the close path is tricky,
+	 * so let's just advertise one entry in that case.
+	 */
+	if (!panfrost_has_hw_feature(pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION))
+		nentries = 1;
+
+	pfdev->js = js = devm_kzalloc(pfdev->dev, sizeof(*js), GFP_KERNEL);
+	if (!js)
+		return -ENOMEM;
+
+	INIT_WORK(&pfdev->reset.work, panfrost_reset_work);
+	spin_lock_init(&js->job_lock);
+
+	js->irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "job");
+	if (js->irq <= 0)
+		return -ENODEV;
+
+	ret = devm_request_threaded_irq(pfdev->dev, js->irq,
+					panfrost_job_irq_handler,
+					panfrost_job_irq_handler_thread,
+					IRQF_SHARED, KBUILD_MODNAME "-job",
+					pfdev);
+	if (ret) {
+		dev_err(pfdev->dev, "failed to request job irq");
+		return ret;
+	}
+
+	pfdev->reset.wq = alloc_ordered_workqueue("panfrost-reset", 0);
+	if (!pfdev->reset.wq)
+		return -ENOMEM;
+
+	for (j = 0; j < NUM_JOB_SLOTS; j++) {
+		js->queue[j].fence_context = dma_fence_context_alloc(1);
+
+		ret = drm_sched_init(&js->queue[j].sched,
+				     &panfrost_sched_ops,
+				     nentries, 0,
+				     msecs_to_jiffies(JOB_TIMEOUT_MS),
+				     pfdev->reset.wq,
+				     NULL, "pan_js", pfdev->dev);
+		if (ret) {
+			dev_err(pfdev->dev, "Failed to create scheduler: %d.", ret);
+			goto err_sched;
+		}
+	}
+
+	panfrost_job_enable_interrupts(pfdev);
+
+	return 0;
+
+err_sched:
+	for (j--; j >= 0; j--)
+		drm_sched_fini(&js->queue[j].sched);
+
+	destroy_workqueue(pfdev->reset.wq);
+	return ret;
+}
+
+void panfrost_job_fini(struct panfrost_device *pfdev)
+{
+	struct panfrost_job_slot *js = pfdev->js;
+	int j;
+
+	job_write(pfdev, JOB_INT_MASK, 0);
+
+	for (j = 0; j < NUM_JOB_SLOTS; j++) {
+		drm_sched_fini(&js->queue[j].sched);
+	}
+
+	cancel_work_sync(&pfdev->reset.work);
+	destroy_workqueue(pfdev->reset.wq);
+}
+
+int panfrost_job_open(struct panfrost_file_priv *panfrost_priv)
+{
+	struct panfrost_device *pfdev = panfrost_priv->pfdev;
+	struct panfrost_job_slot *js = pfdev->js;
+	struct drm_gpu_scheduler *sched;
+	int ret, i;
+
+	for (i = 0; i < NUM_JOB_SLOTS; i++) {
+		sched = &js->queue[i].sched;
+		ret = drm_sched_entity_init(&panfrost_priv->sched_entity[i],
+					    DRM_SCHED_PRIORITY_NORMAL, &sched,
+					    1, NULL);
+		if (WARN_ON(ret))
+			return ret;
+	}
+	return 0;
+}
+
+void panfrost_job_close(struct panfrost_file_priv *panfrost_priv)
+{
+	struct panfrost_device *pfdev = panfrost_priv->pfdev;
+	int i;
+
+	for (i = 0; i < NUM_JOB_SLOTS; i++)
+		drm_sched_entity_destroy(&panfrost_priv->sched_entity[i]);
+
+	/* Kill in-flight jobs */
+	spin_lock(&pfdev->js->job_lock);
+	for (i = 0; i < NUM_JOB_SLOTS; i++) {
+		struct drm_sched_entity *entity = &panfrost_priv->sched_entity[i];
+		int j;
+
+		for (j = ARRAY_SIZE(pfdev->jobs[0]) - 1; j >= 0; j--) {
+			struct panfrost_job *job = pfdev->jobs[i][j];
+			u32 cmd;
+
+			if (!job || job->base.entity != entity)
+				continue;
+
+			if (j == 1) {
+				/* Try to cancel the job before it starts */
+				job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP);
+				/* Reset the job head so it doesn't get restarted if
+				 * the job in the first slot failed.
+				 */
+				job->jc = 0;
+			}
+
+			if (panfrost_has_hw_feature(pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+				cmd = panfrost_get_job_chain_flag(job) ?
+				      JS_COMMAND_HARD_STOP_1 :
+				      JS_COMMAND_HARD_STOP_0;
+			} else {
+				cmd = JS_COMMAND_HARD_STOP;
+			}
+
+			job_write(pfdev, JS_COMMAND(i), cmd);
+		}
+	}
+	spin_unlock(&pfdev->js->job_lock);
+}
+
+int panfrost_job_is_idle(struct panfrost_device *pfdev)
+{
+	struct panfrost_job_slot *js = pfdev->js;
+	int i;
+
+	for (i = 0; i < NUM_JOB_SLOTS; i++) {
+		/* If there are any jobs in the HW queue, we're not idle */
+		if (atomic_read(&js->queue[i].sched.hw_rq_count))
+			return false;
+	}
+
+	return true;
+}
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h
new file mode 100644
index 0000000000..8becc1ba0e
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_job.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2019 Collabora ltd. */
+
+#ifndef __PANFROST_JOB_H__
+#define __PANFROST_JOB_H__
+
+#include <uapi/drm/panfrost_drm.h>
+#include <drm/gpu_scheduler.h>
+
+struct panfrost_device;
+struct panfrost_gem_object;
+struct panfrost_file_priv;
+
+struct panfrost_job {
+	struct drm_sched_job base;
+
+	struct kref refcount;
+
+	struct panfrost_device *pfdev;
+	struct panfrost_mmu *mmu;
+
+	/* Fence to be signaled by IRQ handler when the job is complete. */
+	struct dma_fence *done_fence;
+
+	__u64 jc;
+	__u32 requirements;
+	__u32 flush_id;
+
+	struct panfrost_gem_mapping **mappings;
+	struct drm_gem_object **bos;
+	u32 bo_count;
+
+	/* Fence to be signaled by drm-sched once its done with the job */
+	struct dma_fence *render_done_fence;
+};
+
+int panfrost_job_init(struct panfrost_device *pfdev);
+void panfrost_job_fini(struct panfrost_device *pfdev);
+int panfrost_job_open(struct panfrost_file_priv *panfrost_priv);
+void panfrost_job_close(struct panfrost_file_priv *panfrost_priv);
+int panfrost_job_get_slot(struct panfrost_job *job);
+int panfrost_job_push(struct panfrost_job *job);
+void panfrost_job_put(struct panfrost_job *job);
+void panfrost_job_enable_interrupts(struct panfrost_device *pfdev);
+int panfrost_job_is_idle(struct panfrost_device *pfdev);
+
+#endif
diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
new file mode 100644
index 0000000000..c0123d09f6
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -0,0 +1,778 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+
+#include <drm/panfrost_drm.h>
+
+#include <linux/atomic.h>
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/io-pgtable.h>
+#include <linux/iommu.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/shmem_fs.h>
+#include <linux/sizes.h>
+
+#include "panfrost_device.h"
+#include "panfrost_mmu.h"
+#include "panfrost_gem.h"
+#include "panfrost_features.h"
+#include "panfrost_regs.h"
+
+#define mmu_write(dev, reg, data) writel(data, dev->iomem + reg)
+#define mmu_read(dev, reg) readl(dev->iomem + reg)
+
+static int wait_ready(struct panfrost_device *pfdev, u32 as_nr)
+{
+	int ret;
+	u32 val;
+
+	/* Wait for the MMU status to indicate there is no active command, in
+	 * case one is pending. */
+	ret = readl_relaxed_poll_timeout_atomic(pfdev->iomem + AS_STATUS(as_nr),
+		val, !(val & AS_STATUS_AS_ACTIVE), 10, 100000);
+
+	if (ret) {
+		/* The GPU hung, let's trigger a reset */
+		panfrost_device_schedule_reset(pfdev);
+		dev_err(pfdev->dev, "AS_ACTIVE bit stuck\n");
+	}
+
+	return ret;
+}
+
+static int write_cmd(struct panfrost_device *pfdev, u32 as_nr, u32 cmd)
+{
+	int status;
+
+	/* write AS_COMMAND when MMU is ready to accept another command */
+	status = wait_ready(pfdev, as_nr);
+	if (!status)
+		mmu_write(pfdev, AS_COMMAND(as_nr), cmd);
+
+	return status;
+}
+
+static void lock_region(struct panfrost_device *pfdev, u32 as_nr,
+			u64 region_start, u64 size)
+{
+	u8 region_width;
+	u64 region;
+	u64 region_end = region_start + size;
+
+	if (!size)
+		return;
+
+	/*
+	 * The locked region is a naturally aligned power of 2 block encoded as
+	 * log2 minus(1).
+	 * Calculate the desired start/end and look for the highest bit which
+	 * differs. The smallest naturally aligned block must include this bit
+	 * change, the desired region starts with this bit (and subsequent bits)
+	 * zeroed and ends with the bit (and subsequent bits) set to one.
+	 */
+	region_width = max(fls64(region_start ^ (region_end - 1)),
+			   const_ilog2(AS_LOCK_REGION_MIN_SIZE)) - 1;
+
+	/*
+	 * Mask off the low bits of region_start (which would be ignored by
+	 * the hardware anyway)
+	 */
+	region_start &= GENMASK_ULL(63, region_width);
+
+	region = region_width | region_start;
+
+	/* Lock the region that needs to be updated */
+	mmu_write(pfdev, AS_LOCKADDR_LO(as_nr), lower_32_bits(region));
+	mmu_write(pfdev, AS_LOCKADDR_HI(as_nr), upper_32_bits(region));
+	write_cmd(pfdev, as_nr, AS_COMMAND_LOCK);
+}
+
+
+static int mmu_hw_do_operation_locked(struct panfrost_device *pfdev, int as_nr,
+				      u64 iova, u64 size, u32 op)
+{
+	if (as_nr < 0)
+		return 0;
+
+	if (op != AS_COMMAND_UNLOCK)
+		lock_region(pfdev, as_nr, iova, size);
+
+	/* Run the MMU operation */
+	write_cmd(pfdev, as_nr, op);
+
+	/* Wait for the flush to complete */
+	return wait_ready(pfdev, as_nr);
+}
+
+static int mmu_hw_do_operation(struct panfrost_device *pfdev,
+			       struct panfrost_mmu *mmu,
+			       u64 iova, u64 size, u32 op)
+{
+	int ret;
+
+	spin_lock(&pfdev->as_lock);
+	ret = mmu_hw_do_operation_locked(pfdev, mmu->as, iova, size, op);
+	spin_unlock(&pfdev->as_lock);
+	return ret;
+}
+
+static void panfrost_mmu_enable(struct panfrost_device *pfdev, struct panfrost_mmu *mmu)
+{
+	int as_nr = mmu->as;
+	struct io_pgtable_cfg *cfg = &mmu->pgtbl_cfg;
+	u64 transtab = cfg->arm_mali_lpae_cfg.transtab;
+	u64 memattr = cfg->arm_mali_lpae_cfg.memattr;
+
+	mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM);
+
+	mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), lower_32_bits(transtab));
+	mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), upper_32_bits(transtab));
+
+	/* Need to revisit mem attrs.
+	 * NC is the default, Mali driver is inner WT.
+	 */
+	mmu_write(pfdev, AS_MEMATTR_LO(as_nr), lower_32_bits(memattr));
+	mmu_write(pfdev, AS_MEMATTR_HI(as_nr), upper_32_bits(memattr));
+
+	write_cmd(pfdev, as_nr, AS_COMMAND_UPDATE);
+}
+
+static void panfrost_mmu_disable(struct panfrost_device *pfdev, u32 as_nr)
+{
+	mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM);
+
+	mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), 0);
+	mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), 0);
+
+	mmu_write(pfdev, AS_MEMATTR_LO(as_nr), 0);
+	mmu_write(pfdev, AS_MEMATTR_HI(as_nr), 0);
+
+	write_cmd(pfdev, as_nr, AS_COMMAND_UPDATE);
+}
+
+u32 panfrost_mmu_as_get(struct panfrost_device *pfdev, struct panfrost_mmu *mmu)
+{
+	int as;
+
+	spin_lock(&pfdev->as_lock);
+
+	as = mmu->as;
+	if (as >= 0) {
+		int en = atomic_inc_return(&mmu->as_count);
+		u32 mask = BIT(as) | BIT(16 + as);
+
+		/*
+		 * AS can be retained by active jobs or a perfcnt context,
+		 * hence the '+ 1' here.
+		 */
+		WARN_ON(en >= (NUM_JOB_SLOTS + 1));
+
+		list_move(&mmu->list, &pfdev->as_lru_list);
+
+		if (pfdev->as_faulty_mask & mask) {
+			/* Unhandled pagefault on this AS, the MMU was
+			 * disabled. We need to re-enable the MMU after
+			 * clearing+unmasking the AS interrupts.
+			 */
+			mmu_write(pfdev, MMU_INT_CLEAR, mask);
+			mmu_write(pfdev, MMU_INT_MASK, ~pfdev->as_faulty_mask);
+			pfdev->as_faulty_mask &= ~mask;
+			panfrost_mmu_enable(pfdev, mmu);
+		}
+
+		goto out;
+	}
+
+	/* Check for a free AS */
+	as = ffz(pfdev->as_alloc_mask);
+	if (!(BIT(as) & pfdev->features.as_present)) {
+		struct panfrost_mmu *lru_mmu;
+
+		list_for_each_entry_reverse(lru_mmu, &pfdev->as_lru_list, list) {
+			if (!atomic_read(&lru_mmu->as_count))
+				break;
+		}
+		WARN_ON(&lru_mmu->list == &pfdev->as_lru_list);
+
+		list_del_init(&lru_mmu->list);
+		as = lru_mmu->as;
+
+		WARN_ON(as < 0);
+		lru_mmu->as = -1;
+	}
+
+	/* Assign the free or reclaimed AS to the FD */
+	mmu->as = as;
+	set_bit(as, &pfdev->as_alloc_mask);
+	atomic_set(&mmu->as_count, 1);
+	list_add(&mmu->list, &pfdev->as_lru_list);
+
+	dev_dbg(pfdev->dev, "Assigned AS%d to mmu %p, alloc_mask=%lx", as, mmu, pfdev->as_alloc_mask);
+
+	panfrost_mmu_enable(pfdev, mmu);
+
+out:
+	spin_unlock(&pfdev->as_lock);
+	return as;
+}
+
+void panfrost_mmu_as_put(struct panfrost_device *pfdev, struct panfrost_mmu *mmu)
+{
+	atomic_dec(&mmu->as_count);
+	WARN_ON(atomic_read(&mmu->as_count) < 0);
+}
+
+void panfrost_mmu_reset(struct panfrost_device *pfdev)
+{
+	struct panfrost_mmu *mmu, *mmu_tmp;
+
+	spin_lock(&pfdev->as_lock);
+
+	pfdev->as_alloc_mask = 0;
+	pfdev->as_faulty_mask = 0;
+
+	list_for_each_entry_safe(mmu, mmu_tmp, &pfdev->as_lru_list, list) {
+		mmu->as = -1;
+		atomic_set(&mmu->as_count, 0);
+		list_del_init(&mmu->list);
+	}
+
+	spin_unlock(&pfdev->as_lock);
+
+	mmu_write(pfdev, MMU_INT_CLEAR, ~0);
+	mmu_write(pfdev, MMU_INT_MASK, ~0);
+}
+
+static size_t get_pgsize(u64 addr, size_t size, size_t *count)
+{
+	/*
+	 * io-pgtable only operates on multiple pages within a single table
+	 * entry, so we need to split at boundaries of the table size, i.e.
+	 * the next block size up. The distance from address A to the next
+	 * boundary of block size B is logically B - A % B, but in unsigned
+	 * two's complement where B is a power of two we get the equivalence
+	 * B - A % B == (B - A) % B == (n * B - A) % B, and choose n = 0 :)
+	 */
+	size_t blk_offset = -addr % SZ_2M;
+
+	if (blk_offset || size < SZ_2M) {
+		*count = min_not_zero(blk_offset, size) / SZ_4K;
+		return SZ_4K;
+	}
+	blk_offset = -addr % SZ_1G ?: SZ_1G;
+	*count = min(blk_offset, size) / SZ_2M;
+	return SZ_2M;
+}
+
+static void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
+				     struct panfrost_mmu *mmu,
+				     u64 iova, u64 size)
+{
+	if (mmu->as < 0)
+		return;
+
+	pm_runtime_get_noresume(pfdev->dev);
+
+	/* Flush the PTs only if we're already awake */
+	if (pm_runtime_active(pfdev->dev))
+		mmu_hw_do_operation(pfdev, mmu, iova, size, AS_COMMAND_FLUSH_PT);
+
+	pm_runtime_put_autosuspend(pfdev->dev);
+}
+
+static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
+		      u64 iova, int prot, struct sg_table *sgt)
+{
+	unsigned int count;
+	struct scatterlist *sgl;
+	struct io_pgtable_ops *ops = mmu->pgtbl_ops;
+	u64 start_iova = iova;
+
+	for_each_sgtable_dma_sg(sgt, sgl, count) {
+		unsigned long paddr = sg_dma_address(sgl);
+		size_t len = sg_dma_len(sgl);
+
+		dev_dbg(pfdev->dev, "map: as=%d, iova=%llx, paddr=%lx, len=%zx", mmu->as, iova, paddr, len);
+
+		while (len) {
+			size_t pgcount, mapped = 0;
+			size_t pgsize = get_pgsize(iova | paddr, len, &pgcount);
+
+			ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot,
+				       GFP_KERNEL, &mapped);
+			/* Don't get stuck if things have gone wrong */
+			mapped = max(mapped, pgsize);
+			iova += mapped;
+			paddr += mapped;
+			len -= mapped;
+		}
+	}
+
+	panfrost_mmu_flush_range(pfdev, mmu, start_iova, iova - start_iova);
+
+	return 0;
+}
+
+int panfrost_mmu_map(struct panfrost_gem_mapping *mapping)
+{
+	struct panfrost_gem_object *bo = mapping->obj;
+	struct drm_gem_shmem_object *shmem = &bo->base;
+	struct drm_gem_object *obj = &shmem->base;
+	struct panfrost_device *pfdev = to_panfrost_device(obj->dev);
+	struct sg_table *sgt;
+	int prot = IOMMU_READ | IOMMU_WRITE;
+
+	if (WARN_ON(mapping->active))
+		return 0;
+
+	if (bo->noexec)
+		prot |= IOMMU_NOEXEC;
+
+	sgt = drm_gem_shmem_get_pages_sgt(shmem);
+	if (WARN_ON(IS_ERR(sgt)))
+		return PTR_ERR(sgt);
+
+	mmu_map_sg(pfdev, mapping->mmu, mapping->mmnode.start << PAGE_SHIFT,
+		   prot, sgt);
+	mapping->active = true;
+
+	return 0;
+}
+
+void panfrost_mmu_unmap(struct panfrost_gem_mapping *mapping)
+{
+	struct panfrost_gem_object *bo = mapping->obj;
+	struct drm_gem_object *obj = &bo->base.base;
+	struct panfrost_device *pfdev = to_panfrost_device(obj->dev);
+	struct io_pgtable_ops *ops = mapping->mmu->pgtbl_ops;
+	u64 iova = mapping->mmnode.start << PAGE_SHIFT;
+	size_t len = mapping->mmnode.size << PAGE_SHIFT;
+	size_t unmapped_len = 0;
+
+	if (WARN_ON(!mapping->active))
+		return;
+
+	dev_dbg(pfdev->dev, "unmap: as=%d, iova=%llx, len=%zx",
+		mapping->mmu->as, iova, len);
+
+	while (unmapped_len < len) {
+		size_t unmapped_page, pgcount;
+		size_t pgsize = get_pgsize(iova, len - unmapped_len, &pgcount);
+
+		if (bo->is_heap)
+			pgcount = 1;
+		if (!bo->is_heap || ops->iova_to_phys(ops, iova)) {
+			unmapped_page = ops->unmap_pages(ops, iova, pgsize, pgcount, NULL);
+			WARN_ON(unmapped_page != pgsize * pgcount);
+		}
+		iova += pgsize * pgcount;
+		unmapped_len += pgsize * pgcount;
+	}
+
+	panfrost_mmu_flush_range(pfdev, mapping->mmu,
+				 mapping->mmnode.start << PAGE_SHIFT, len);
+	mapping->active = false;
+}
+
+static void mmu_tlb_inv_context_s1(void *cookie)
+{}
+
+static void mmu_tlb_sync_context(void *cookie)
+{
+	//struct panfrost_mmu *mmu = cookie;
+	// TODO: Wait 1000 GPU cycles for HW_ISSUE_6367/T60X
+}
+
+static void mmu_tlb_flush_walk(unsigned long iova, size_t size, size_t granule,
+			       void *cookie)
+{
+	mmu_tlb_sync_context(cookie);
+}
+
+static const struct iommu_flush_ops mmu_tlb_ops = {
+	.tlb_flush_all	= mmu_tlb_inv_context_s1,
+	.tlb_flush_walk = mmu_tlb_flush_walk,
+};
+
+static struct panfrost_gem_mapping *
+addr_to_mapping(struct panfrost_device *pfdev, int as, u64 addr)
+{
+	struct panfrost_gem_mapping *mapping = NULL;
+	struct drm_mm_node *node;
+	u64 offset = addr >> PAGE_SHIFT;
+	struct panfrost_mmu *mmu;
+
+	spin_lock(&pfdev->as_lock);
+	list_for_each_entry(mmu, &pfdev->as_lru_list, list) {
+		if (as == mmu->as)
+			goto found_mmu;
+	}
+	goto out;
+
+found_mmu:
+
+	spin_lock(&mmu->mm_lock);
+
+	drm_mm_for_each_node(node, &mmu->mm) {
+		if (offset >= node->start &&
+		    offset < (node->start + node->size)) {
+			mapping = drm_mm_node_to_panfrost_mapping(node);
+
+			kref_get(&mapping->refcount);
+			break;
+		}
+	}
+
+	spin_unlock(&mmu->mm_lock);
+out:
+	spin_unlock(&pfdev->as_lock);
+	return mapping;
+}
+
+#define NUM_FAULT_PAGES (SZ_2M / PAGE_SIZE)
+
+static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
+				       u64 addr)
+{
+	int ret, i;
+	struct panfrost_gem_mapping *bomapping;
+	struct panfrost_gem_object *bo;
+	struct address_space *mapping;
+	struct drm_gem_object *obj;
+	pgoff_t page_offset;
+	struct sg_table *sgt;
+	struct page **pages;
+
+	bomapping = addr_to_mapping(pfdev, as, addr);
+	if (!bomapping)
+		return -ENOENT;
+
+	bo = bomapping->obj;
+	if (!bo->is_heap) {
+		dev_WARN(pfdev->dev, "matching BO is not heap type (GPU VA = %llx)",
+			 bomapping->mmnode.start << PAGE_SHIFT);
+		ret = -EINVAL;
+		goto err_bo;
+	}
+	WARN_ON(bomapping->mmu->as != as);
+
+	/* Assume 2MB alignment and size multiple */
+	addr &= ~((u64)SZ_2M - 1);
+	page_offset = addr >> PAGE_SHIFT;
+	page_offset -= bomapping->mmnode.start;
+
+	obj = &bo->base.base;
+
+	dma_resv_lock(obj->resv, NULL);
+
+	if (!bo->base.pages) {
+		bo->sgts = kvmalloc_array(bo->base.base.size / SZ_2M,
+				     sizeof(struct sg_table), GFP_KERNEL | __GFP_ZERO);
+		if (!bo->sgts) {
+			ret = -ENOMEM;
+			goto err_unlock;
+		}
+
+		pages = kvmalloc_array(bo->base.base.size >> PAGE_SHIFT,
+				       sizeof(struct page *), GFP_KERNEL | __GFP_ZERO);
+		if (!pages) {
+			kvfree(bo->sgts);
+			bo->sgts = NULL;
+			ret = -ENOMEM;
+			goto err_unlock;
+		}
+		bo->base.pages = pages;
+		bo->base.pages_use_count = 1;
+	} else {
+		pages = bo->base.pages;
+		if (pages[page_offset]) {
+			/* Pages are already mapped, bail out. */
+			goto out;
+		}
+	}
+
+	mapping = bo->base.base.filp->f_mapping;
+	mapping_set_unevictable(mapping);
+
+	for (i = page_offset; i < page_offset + NUM_FAULT_PAGES; i++) {
+		pages[i] = shmem_read_mapping_page(mapping, i);
+		if (IS_ERR(pages[i])) {
+			ret = PTR_ERR(pages[i]);
+			pages[i] = NULL;
+			goto err_pages;
+		}
+	}
+
+	sgt = &bo->sgts[page_offset / (SZ_2M / PAGE_SIZE)];
+	ret = sg_alloc_table_from_pages(sgt, pages + page_offset,
+					NUM_FAULT_PAGES, 0, SZ_2M, GFP_KERNEL);
+	if (ret)
+		goto err_pages;
+
+	ret = dma_map_sgtable(pfdev->dev, sgt, DMA_BIDIRECTIONAL, 0);
+	if (ret)
+		goto err_map;
+
+	mmu_map_sg(pfdev, bomapping->mmu, addr,
+		   IOMMU_WRITE | IOMMU_READ | IOMMU_NOEXEC, sgt);
+
+	bomapping->active = true;
+
+	dev_dbg(pfdev->dev, "mapped page fault @ AS%d %llx", as, addr);
+
+out:
+	dma_resv_unlock(obj->resv);
+
+	panfrost_gem_mapping_put(bomapping);
+
+	return 0;
+
+err_map:
+	sg_free_table(sgt);
+err_pages:
+	drm_gem_shmem_put_pages(&bo->base);
+err_unlock:
+	dma_resv_unlock(obj->resv);
+err_bo:
+	panfrost_gem_mapping_put(bomapping);
+	return ret;
+}
+
+static void panfrost_mmu_release_ctx(struct kref *kref)
+{
+	struct panfrost_mmu *mmu = container_of(kref, struct panfrost_mmu,
+						refcount);
+	struct panfrost_device *pfdev = mmu->pfdev;
+
+	spin_lock(&pfdev->as_lock);
+	if (mmu->as >= 0) {
+		pm_runtime_get_noresume(pfdev->dev);
+		if (pm_runtime_active(pfdev->dev))
+			panfrost_mmu_disable(pfdev, mmu->as);
+		pm_runtime_put_autosuspend(pfdev->dev);
+
+		clear_bit(mmu->as, &pfdev->as_alloc_mask);
+		clear_bit(mmu->as, &pfdev->as_in_use_mask);
+		list_del(&mmu->list);
+	}
+	spin_unlock(&pfdev->as_lock);
+
+	free_io_pgtable_ops(mmu->pgtbl_ops);
+	drm_mm_takedown(&mmu->mm);
+	kfree(mmu);
+}
+
+void panfrost_mmu_ctx_put(struct panfrost_mmu *mmu)
+{
+	kref_put(&mmu->refcount, panfrost_mmu_release_ctx);
+}
+
+struct panfrost_mmu *panfrost_mmu_ctx_get(struct panfrost_mmu *mmu)
+{
+	kref_get(&mmu->refcount);
+
+	return mmu;
+}
+
+#define PFN_4G		(SZ_4G >> PAGE_SHIFT)
+#define PFN_4G_MASK	(PFN_4G - 1)
+#define PFN_16M		(SZ_16M >> PAGE_SHIFT)
+
+static void panfrost_drm_mm_color_adjust(const struct drm_mm_node *node,
+					 unsigned long color,
+					 u64 *start, u64 *end)
+{
+	/* Executable buffers can't start or end on a 4GB boundary */
+	if (!(color & PANFROST_BO_NOEXEC)) {
+		u64 next_seg;
+
+		if ((*start & PFN_4G_MASK) == 0)
+			(*start)++;
+
+		if ((*end & PFN_4G_MASK) == 0)
+			(*end)--;
+
+		next_seg = ALIGN(*start, PFN_4G);
+		if (next_seg - *start <= PFN_16M)
+			*start = next_seg + 1;
+
+		*end = min(*end, ALIGN(*start, PFN_4G) - 1);
+	}
+}
+
+struct panfrost_mmu *panfrost_mmu_ctx_create(struct panfrost_device *pfdev)
+{
+	struct panfrost_mmu *mmu;
+
+	mmu = kzalloc(sizeof(*mmu), GFP_KERNEL);
+	if (!mmu)
+		return ERR_PTR(-ENOMEM);
+
+	mmu->pfdev = pfdev;
+	spin_lock_init(&mmu->mm_lock);
+
+	/* 4G enough for now. can be 48-bit */
+	drm_mm_init(&mmu->mm, SZ_32M >> PAGE_SHIFT, (SZ_4G - SZ_32M) >> PAGE_SHIFT);
+	mmu->mm.color_adjust = panfrost_drm_mm_color_adjust;
+
+	INIT_LIST_HEAD(&mmu->list);
+	mmu->as = -1;
+
+	mmu->pgtbl_cfg = (struct io_pgtable_cfg) {
+		.pgsize_bitmap	= SZ_4K | SZ_2M,
+		.ias		= FIELD_GET(0xff, pfdev->features.mmu_features),
+		.oas		= FIELD_GET(0xff00, pfdev->features.mmu_features),
+		.coherent_walk	= pfdev->coherent,
+		.tlb		= &mmu_tlb_ops,
+		.iommu_dev	= pfdev->dev,
+	};
+
+	mmu->pgtbl_ops = alloc_io_pgtable_ops(ARM_MALI_LPAE, &mmu->pgtbl_cfg,
+					      mmu);
+	if (!mmu->pgtbl_ops) {
+		kfree(mmu);
+		return ERR_PTR(-EINVAL);
+	}
+
+	kref_init(&mmu->refcount);
+
+	return mmu;
+}
+
+static const char *access_type_name(struct panfrost_device *pfdev,
+		u32 fault_status)
+{
+	switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
+	case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
+		if (panfrost_has_hw_feature(pfdev, HW_FEATURE_AARCH64_MMU))
+			return "ATOMIC";
+		else
+			return "UNKNOWN";
+	case AS_FAULTSTATUS_ACCESS_TYPE_READ:
+		return "READ";
+	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
+		return "WRITE";
+	case AS_FAULTSTATUS_ACCESS_TYPE_EX:
+		return "EXECUTE";
+	default:
+		WARN_ON(1);
+		return NULL;
+	}
+}
+
+static irqreturn_t panfrost_mmu_irq_handler(int irq, void *data)
+{
+	struct panfrost_device *pfdev = data;
+
+	if (!mmu_read(pfdev, MMU_INT_STAT))
+		return IRQ_NONE;
+
+	mmu_write(pfdev, MMU_INT_MASK, 0);
+	return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t panfrost_mmu_irq_handler_thread(int irq, void *data)
+{
+	struct panfrost_device *pfdev = data;
+	u32 status = mmu_read(pfdev, MMU_INT_RAWSTAT);
+	int ret;
+
+	while (status) {
+		u32 as = ffs(status | (status >> 16)) - 1;
+		u32 mask = BIT(as) | BIT(as + 16);
+		u64 addr;
+		u32 fault_status;
+		u32 exception_type;
+		u32 access_type;
+		u32 source_id;
+
+		fault_status = mmu_read(pfdev, AS_FAULTSTATUS(as));
+		addr = mmu_read(pfdev, AS_FAULTADDRESS_LO(as));
+		addr |= (u64)mmu_read(pfdev, AS_FAULTADDRESS_HI(as)) << 32;
+
+		/* decode the fault status */
+		exception_type = fault_status & 0xFF;
+		access_type = (fault_status >> 8) & 0x3;
+		source_id = (fault_status >> 16);
+
+		mmu_write(pfdev, MMU_INT_CLEAR, mask);
+
+		/* Page fault only */
+		ret = -1;
+		if ((status & mask) == BIT(as) && (exception_type & 0xF8) == 0xC0)
+			ret = panfrost_mmu_map_fault_addr(pfdev, as, addr);
+
+		if (ret) {
+			/* terminal fault, print info about the fault */
+			dev_err(pfdev->dev,
+				"Unhandled Page fault in AS%d at VA 0x%016llX\n"
+				"Reason: %s\n"
+				"raw fault status: 0x%X\n"
+				"decoded fault status: %s\n"
+				"exception type 0x%X: %s\n"
+				"access type 0x%X: %s\n"
+				"source id 0x%X\n",
+				as, addr,
+				"TODO",
+				fault_status,
+				(fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
+				exception_type, panfrost_exception_name(exception_type),
+				access_type, access_type_name(pfdev, fault_status),
+				source_id);
+
+			spin_lock(&pfdev->as_lock);
+			/* Ignore MMU interrupts on this AS until it's been
+			 * re-enabled.
+			 */
+			pfdev->as_faulty_mask |= mask;
+
+			/* Disable the MMU to kill jobs on this AS. */
+			panfrost_mmu_disable(pfdev, as);
+			spin_unlock(&pfdev->as_lock);
+		}
+
+		status &= ~mask;
+
+		/* If we received new MMU interrupts, process them before returning. */
+		if (!status)
+			status = mmu_read(pfdev, MMU_INT_RAWSTAT) & ~pfdev->as_faulty_mask;
+	}
+
+	spin_lock(&pfdev->as_lock);
+	mmu_write(pfdev, MMU_INT_MASK, ~pfdev->as_faulty_mask);
+	spin_unlock(&pfdev->as_lock);
+
+	return IRQ_HANDLED;
+};
+
+int panfrost_mmu_init(struct panfrost_device *pfdev)
+{
+	int err, irq;
+
+	irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "mmu");
+	if (irq <= 0)
+		return -ENODEV;
+
+	err = devm_request_threaded_irq(pfdev->dev, irq,
+					panfrost_mmu_irq_handler,
+					panfrost_mmu_irq_handler_thread,
+					IRQF_SHARED, KBUILD_MODNAME "-mmu",
+					pfdev);
+
+	if (err) {
+		dev_err(pfdev->dev, "failed to request mmu irq");
+		return err;
+	}
+
+	return 0;
+}
+
+void panfrost_mmu_fini(struct panfrost_device *pfdev)
+{
+	mmu_write(pfdev, MMU_INT_MASK, 0);
+}
diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.h b/drivers/gpu/drm/panfrost/panfrost_mmu.h
new file mode 100644
index 0000000000..cc2a0d307f
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+
+#ifndef __PANFROST_MMU_H__
+#define __PANFROST_MMU_H__
+
+struct panfrost_gem_mapping;
+struct panfrost_file_priv;
+struct panfrost_mmu;
+
+int panfrost_mmu_map(struct panfrost_gem_mapping *mapping);
+void panfrost_mmu_unmap(struct panfrost_gem_mapping *mapping);
+
+int panfrost_mmu_init(struct panfrost_device *pfdev);
+void panfrost_mmu_fini(struct panfrost_device *pfdev);
+void panfrost_mmu_reset(struct panfrost_device *pfdev);
+
+u32 panfrost_mmu_as_get(struct panfrost_device *pfdev, struct panfrost_mmu *mmu);
+void panfrost_mmu_as_put(struct panfrost_device *pfdev, struct panfrost_mmu *mmu);
+
+struct panfrost_mmu *panfrost_mmu_ctx_get(struct panfrost_mmu *mmu);
+void panfrost_mmu_ctx_put(struct panfrost_mmu *mmu);
+struct panfrost_mmu *panfrost_mmu_ctx_create(struct panfrost_device *pfdev);
+
+#endif
diff --git a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c
new file mode 100644
index 0000000000..ba9b6e2b26
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2019 Collabora Ltd */
+
+#include <linux/completion.h>
+#include <linux/iopoll.h>
+#include <linux/iosys-map.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include <drm/drm_file.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/panfrost_drm.h>
+
+#include "panfrost_device.h"
+#include "panfrost_features.h"
+#include "panfrost_gem.h"
+#include "panfrost_issues.h"
+#include "panfrost_job.h"
+#include "panfrost_mmu.h"
+#include "panfrost_perfcnt.h"
+#include "panfrost_regs.h"
+
+#define COUNTERS_PER_BLOCK		64
+#define BYTES_PER_COUNTER		4
+#define BLOCKS_PER_COREGROUP		8
+#define V4_SHADERS_PER_COREGROUP	4
+
+struct panfrost_perfcnt {
+	struct panfrost_gem_mapping *mapping;
+	size_t bosize;
+	void *buf;
+	struct panfrost_file_priv *user;
+	struct mutex lock;
+	struct completion dump_comp;
+};
+
+void panfrost_perfcnt_clean_cache_done(struct panfrost_device *pfdev)
+{
+	complete(&pfdev->perfcnt->dump_comp);
+}
+
+void panfrost_perfcnt_sample_done(struct panfrost_device *pfdev)
+{
+	gpu_write(pfdev, GPU_CMD, GPU_CMD_CLEAN_CACHES);
+}
+
+static int panfrost_perfcnt_dump_locked(struct panfrost_device *pfdev)
+{
+	u64 gpuva;
+	int ret;
+
+	reinit_completion(&pfdev->perfcnt->dump_comp);
+	gpuva = pfdev->perfcnt->mapping->mmnode.start << PAGE_SHIFT;
+	gpu_write(pfdev, GPU_PERFCNT_BASE_LO, lower_32_bits(gpuva));
+	gpu_write(pfdev, GPU_PERFCNT_BASE_HI, upper_32_bits(gpuva));
+	gpu_write(pfdev, GPU_INT_CLEAR,
+		  GPU_IRQ_CLEAN_CACHES_COMPLETED |
+		  GPU_IRQ_PERFCNT_SAMPLE_COMPLETED);
+	gpu_write(pfdev, GPU_CMD, GPU_CMD_PERFCNT_SAMPLE);
+	ret = wait_for_completion_interruptible_timeout(&pfdev->perfcnt->dump_comp,
+							msecs_to_jiffies(1000));
+	if (!ret)
+		ret = -ETIMEDOUT;
+	else if (ret > 0)
+		ret = 0;
+
+	return ret;
+}
+
+static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev,
+					  struct drm_file *file_priv,
+					  unsigned int counterset)
+{
+	struct panfrost_file_priv *user = file_priv->driver_priv;
+	struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
+	struct iosys_map map;
+	struct drm_gem_shmem_object *bo;
+	u32 cfg, as;
+	int ret;
+
+	if (user == perfcnt->user)
+		return 0;
+	else if (perfcnt->user)
+		return -EBUSY;
+
+	ret = pm_runtime_get_sync(pfdev->dev);
+	if (ret < 0)
+		goto err_put_pm;
+
+	bo = drm_gem_shmem_create(pfdev->ddev, perfcnt->bosize);
+	if (IS_ERR(bo)) {
+		ret = PTR_ERR(bo);
+		goto err_put_pm;
+	}
+
+	/* Map the perfcnt buf in the address space attached to file_priv. */
+	ret = panfrost_gem_open(&bo->base, file_priv);
+	if (ret)
+		goto err_put_bo;
+
+	perfcnt->mapping = panfrost_gem_mapping_get(to_panfrost_bo(&bo->base),
+						    user);
+	if (!perfcnt->mapping) {
+		ret = -EINVAL;
+		goto err_close_bo;
+	}
+
+	ret = drm_gem_vmap_unlocked(&bo->base, &map);
+	if (ret)
+		goto err_put_mapping;
+	perfcnt->buf = map.vaddr;
+
+	/*
+	 * Invalidate the cache and clear the counters to start from a fresh
+	 * state.
+	 */
+	reinit_completion(&pfdev->perfcnt->dump_comp);
+	gpu_write(pfdev, GPU_INT_CLEAR,
+		  GPU_IRQ_CLEAN_CACHES_COMPLETED |
+		  GPU_IRQ_PERFCNT_SAMPLE_COMPLETED);
+	gpu_write(pfdev, GPU_CMD, GPU_CMD_PERFCNT_CLEAR);
+	gpu_write(pfdev, GPU_CMD, GPU_CMD_CLEAN_INV_CACHES);
+	ret = wait_for_completion_timeout(&pfdev->perfcnt->dump_comp,
+					  msecs_to_jiffies(1000));
+	if (!ret) {
+		ret = -ETIMEDOUT;
+		goto err_vunmap;
+	}
+
+	perfcnt->user = user;
+
+	as = panfrost_mmu_as_get(pfdev, perfcnt->mapping->mmu);
+	cfg = GPU_PERFCNT_CFG_AS(as) |
+	      GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_MANUAL);
+
+	/*
+	 * Bifrost GPUs have 2 set of counters, but we're only interested by
+	 * the first one for now.
+	 */
+	if (panfrost_model_is_bifrost(pfdev))
+		cfg |= GPU_PERFCNT_CFG_SETSEL(counterset);
+
+	gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0xffffffff);
+	gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0xffffffff);
+	gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0xffffffff);
+
+	/*
+	 * Due to PRLAM-8186 we need to disable the Tiler before we enable HW
+	 * counters.
+	 */
+	if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8186))
+		gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
+	else
+		gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0xffffffff);
+
+	gpu_write(pfdev, GPU_PERFCNT_CFG, cfg);
+
+	if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8186))
+		gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0xffffffff);
+
+	/* The BO ref is retained by the mapping. */
+	drm_gem_object_put(&bo->base);
+
+	return 0;
+
+err_vunmap:
+	drm_gem_vunmap_unlocked(&bo->base, &map);
+err_put_mapping:
+	panfrost_gem_mapping_put(perfcnt->mapping);
+err_close_bo:
+	panfrost_gem_close(&bo->base, file_priv);
+err_put_bo:
+	drm_gem_object_put(&bo->base);
+err_put_pm:
+	pm_runtime_put(pfdev->dev);
+	return ret;
+}
+
+static int panfrost_perfcnt_disable_locked(struct panfrost_device *pfdev,
+					   struct drm_file *file_priv)
+{
+	struct panfrost_file_priv *user = file_priv->driver_priv;
+	struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
+	struct iosys_map map = IOSYS_MAP_INIT_VADDR(perfcnt->buf);
+
+	if (user != perfcnt->user)
+		return -EINVAL;
+
+	gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0x0);
+	gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0x0);
+	gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0x0);
+	gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
+	gpu_write(pfdev, GPU_PERFCNT_CFG,
+		  GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF));
+
+	perfcnt->user = NULL;
+	drm_gem_vunmap_unlocked(&perfcnt->mapping->obj->base.base, &map);
+	perfcnt->buf = NULL;
+	panfrost_gem_close(&perfcnt->mapping->obj->base.base, file_priv);
+	panfrost_mmu_as_put(pfdev, perfcnt->mapping->mmu);
+	panfrost_gem_mapping_put(perfcnt->mapping);
+	perfcnt->mapping = NULL;
+	pm_runtime_mark_last_busy(pfdev->dev);
+	pm_runtime_put_autosuspend(pfdev->dev);
+
+	return 0;
+}
+
+int panfrost_ioctl_perfcnt_enable(struct drm_device *dev, void *data,
+				  struct drm_file *file_priv)
+{
+	struct panfrost_device *pfdev = dev->dev_private;
+	struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
+	struct drm_panfrost_perfcnt_enable *req = data;
+	int ret;
+
+	ret = panfrost_unstable_ioctl_check();
+	if (ret)
+		return ret;
+
+	/* Only Bifrost GPUs have 2 set of counters. */
+	if (req->counterset > (panfrost_model_is_bifrost(pfdev) ? 1 : 0))
+		return -EINVAL;
+
+	mutex_lock(&perfcnt->lock);
+	if (req->enable)
+		ret = panfrost_perfcnt_enable_locked(pfdev, file_priv,
+						     req->counterset);
+	else
+		ret = panfrost_perfcnt_disable_locked(pfdev, file_priv);
+	mutex_unlock(&perfcnt->lock);
+
+	return ret;
+}
+
+int panfrost_ioctl_perfcnt_dump(struct drm_device *dev, void *data,
+				struct drm_file *file_priv)
+{
+	struct panfrost_device *pfdev = dev->dev_private;
+	struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
+	struct drm_panfrost_perfcnt_dump *req = data;
+	void __user *user_ptr = (void __user *)(uintptr_t)req->buf_ptr;
+	int ret;
+
+	ret = panfrost_unstable_ioctl_check();
+	if (ret)
+		return ret;
+
+	mutex_lock(&perfcnt->lock);
+	if (perfcnt->user != file_priv->driver_priv) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = panfrost_perfcnt_dump_locked(pfdev);
+	if (ret)
+		goto out;
+
+	if (copy_to_user(user_ptr, perfcnt->buf, perfcnt->bosize))
+		ret = -EFAULT;
+
+out:
+	mutex_unlock(&perfcnt->lock);
+
+	return ret;
+}
+
+void panfrost_perfcnt_close(struct drm_file *file_priv)
+{
+	struct panfrost_file_priv *pfile = file_priv->driver_priv;
+	struct panfrost_device *pfdev = pfile->pfdev;
+	struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
+
+	pm_runtime_get_sync(pfdev->dev);
+	mutex_lock(&perfcnt->lock);
+	if (perfcnt->user == pfile)
+		panfrost_perfcnt_disable_locked(pfdev, file_priv);
+	mutex_unlock(&perfcnt->lock);
+	pm_runtime_mark_last_busy(pfdev->dev);
+	pm_runtime_put_autosuspend(pfdev->dev);
+}
+
+int panfrost_perfcnt_init(struct panfrost_device *pfdev)
+{
+	struct panfrost_perfcnt *perfcnt;
+	size_t size;
+
+	if (panfrost_has_hw_feature(pfdev, HW_FEATURE_V4)) {
+		unsigned int ncoregroups;
+
+		ncoregroups = hweight64(pfdev->features.l2_present);
+		size = ncoregroups * BLOCKS_PER_COREGROUP *
+		       COUNTERS_PER_BLOCK * BYTES_PER_COUNTER;
+	} else {
+		unsigned int nl2c, ncores;
+
+		/*
+		 * TODO: define a macro to extract the number of l2 caches from
+		 * mem_features.
+		 */
+		nl2c = ((pfdev->features.mem_features >> 8) & GENMASK(3, 0)) + 1;
+
+		/*
+		 * shader_present might be sparse, but the counters layout
+		 * forces to dump unused regions too, hence the fls64() call
+		 * instead of hweight64().
+		 */
+		ncores = fls64(pfdev->features.shader_present);
+
+		/*
+		 * There's always one JM and one Tiler block, hence the '+ 2'
+		 * here.
+		 */
+		size = (nl2c + ncores + 2) *
+		       COUNTERS_PER_BLOCK * BYTES_PER_COUNTER;
+	}
+
+	perfcnt = devm_kzalloc(pfdev->dev, sizeof(*perfcnt), GFP_KERNEL);
+	if (!perfcnt)
+		return -ENOMEM;
+
+	perfcnt->bosize = size;
+
+	/* Start with everything disabled. */
+	gpu_write(pfdev, GPU_PERFCNT_CFG,
+		  GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF));
+	gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0);
+	gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0);
+	gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0);
+	gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
+
+	init_completion(&perfcnt->dump_comp);
+	mutex_init(&perfcnt->lock);
+	pfdev->perfcnt = perfcnt;
+
+	return 0;
+}
+
+void panfrost_perfcnt_fini(struct panfrost_device *pfdev)
+{
+	/* Disable everything before leaving. */
+	gpu_write(pfdev, GPU_PERFCNT_CFG,
+		  GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF));
+	gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0);
+	gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0);
+	gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0);
+	gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
+}
diff --git a/drivers/gpu/drm/panfrost/panfrost_perfcnt.h b/drivers/gpu/drm/panfrost/panfrost_perfcnt.h
new file mode 100644
index 0000000000..8bbcf5f5fb
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_perfcnt.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2019 Collabora Ltd */
+#ifndef __PANFROST_PERFCNT_H__
+#define __PANFROST_PERFCNT_H__
+
+#include "panfrost_device.h"
+
+void panfrost_perfcnt_sample_done(struct panfrost_device *pfdev);
+void panfrost_perfcnt_clean_cache_done(struct panfrost_device *pfdev);
+int panfrost_perfcnt_init(struct panfrost_device *pfdev);
+void panfrost_perfcnt_fini(struct panfrost_device *pfdev);
+void panfrost_perfcnt_close(struct drm_file *file_priv);
+int panfrost_ioctl_perfcnt_enable(struct drm_device *dev, void *data,
+				  struct drm_file *file_priv);
+int panfrost_ioctl_perfcnt_dump(struct drm_device *dev, void *data,
+				struct drm_file *file_priv);
+
+#endif
diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h b/drivers/gpu/drm/panfrost/panfrost_regs.h
new file mode 100644
index 0000000000..919f44ac85
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_regs.h
@@ -0,0 +1,332 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+/*
+ * Register definitions based on mali_midg_regmap.h
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ */
+#ifndef __PANFROST_REGS_H__
+#define __PANFROST_REGS_H__
+
+#define GPU_ID				0x00
+#define GPU_L2_FEATURES			0x004	/* (RO) Level 2 cache features */
+#define GPU_CORE_FEATURES		0x008	/* (RO) Shader Core Features */
+#define GPU_TILER_FEATURES		0x00C	/* (RO) Tiler Features */
+#define GPU_MEM_FEATURES		0x010	/* (RO) Memory system features */
+#define   GROUPS_L2_COHERENT		BIT(0)	/* Cores groups are l2 coherent */
+
+#define GPU_MMU_FEATURES		0x014	/* (RO) MMU features */
+#define GPU_AS_PRESENT			0x018	/* (RO) Address space slots present */
+#define GPU_JS_PRESENT			0x01C	/* (RO) Job slots present */
+
+#define GPU_INT_RAWSTAT			0x20
+#define GPU_INT_CLEAR			0x24
+#define GPU_INT_MASK			0x28
+#define GPU_INT_STAT			0x2c
+#define   GPU_IRQ_FAULT			BIT(0)
+#define   GPU_IRQ_MULTIPLE_FAULT	BIT(7)
+#define   GPU_IRQ_RESET_COMPLETED	BIT(8)
+#define   GPU_IRQ_POWER_CHANGED		BIT(9)
+#define   GPU_IRQ_POWER_CHANGED_ALL	BIT(10)
+#define   GPU_IRQ_PERFCNT_SAMPLE_COMPLETED BIT(16)
+#define   GPU_IRQ_CLEAN_CACHES_COMPLETED BIT(17)
+#define   GPU_IRQ_MASK_ALL			 \
+	  (GPU_IRQ_FAULT			|\
+	   GPU_IRQ_MULTIPLE_FAULT		|\
+	   GPU_IRQ_RESET_COMPLETED		|\
+	   GPU_IRQ_POWER_CHANGED		|\
+	   GPU_IRQ_POWER_CHANGED_ALL		|\
+	   GPU_IRQ_PERFCNT_SAMPLE_COMPLETED	|\
+	   GPU_IRQ_CLEAN_CACHES_COMPLETED)
+#define GPU_IRQ_MASK_ERROR	   		\
+	(					\
+	 GPU_IRQ_FAULT				|\
+	 GPU_IRQ_MULTIPLE_FAULT)
+#define GPU_CMD				0x30
+#define   GPU_CMD_SOFT_RESET		0x01
+#define   GPU_CMD_PERFCNT_CLEAR		0x03
+#define   GPU_CMD_PERFCNT_SAMPLE	0x04
+#define   GPU_CMD_CLEAN_CACHES		0x07
+#define   GPU_CMD_CLEAN_INV_CACHES	0x08
+#define GPU_STATUS			0x34
+#define   GPU_STATUS_PRFCNT_ACTIVE	BIT(2)
+#define GPU_LATEST_FLUSH_ID		0x38
+#define GPU_PWR_KEY			0x50	/* (WO) Power manager key register */
+#define  GPU_PWR_KEY_UNLOCK		0x2968A819
+#define GPU_PWR_OVERRIDE0		0x54	/* (RW) Power manager override settings */
+#define GPU_PWR_OVERRIDE1		0x58	/* (RW) Power manager override settings */
+#define GPU_FAULT_STATUS		0x3C
+#define GPU_FAULT_ADDRESS_LO		0x40
+#define GPU_FAULT_ADDRESS_HI		0x44
+
+#define GPU_PERFCNT_BASE_LO		0x60
+#define GPU_PERFCNT_BASE_HI		0x64
+#define GPU_PERFCNT_CFG			0x68
+#define   GPU_PERFCNT_CFG_MODE(x)	(x)
+#define   GPU_PERFCNT_CFG_MODE_OFF	0
+#define   GPU_PERFCNT_CFG_MODE_MANUAL	1
+#define   GPU_PERFCNT_CFG_MODE_TILE	2
+#define   GPU_PERFCNT_CFG_AS(x)		((x) << 4)
+#define   GPU_PERFCNT_CFG_SETSEL(x)	((x) << 8)
+#define GPU_PRFCNT_JM_EN		0x6c
+#define GPU_PRFCNT_SHADER_EN		0x70
+#define GPU_PRFCNT_TILER_EN		0x74
+#define GPU_PRFCNT_MMU_L2_EN		0x7c
+
+#define GPU_THREAD_MAX_THREADS		0x0A0	/* (RO) Maximum number of threads per core */
+#define GPU_THREAD_MAX_WORKGROUP_SIZE	0x0A4	/* (RO) Maximum workgroup size */
+#define GPU_THREAD_MAX_BARRIER_SIZE	0x0A8	/* (RO) Maximum threads waiting at a barrier */
+#define GPU_THREAD_FEATURES		0x0AC	/* (RO) Thread features */
+#define GPU_THREAD_TLS_ALLOC		0x310   /* (RO) Number of threads per core that
+						 * TLS must be allocated for */
+
+#define GPU_TEXTURE_FEATURES(n)		(0x0B0 + ((n) * 4))
+#define GPU_JS_FEATURES(n)		(0x0C0 + ((n) * 4))
+#define GPU_AFBC_FEATURES		(0x4C)	/* (RO) AFBC support on Bifrost */
+
+#define GPU_SHADER_PRESENT_LO		0x100	/* (RO) Shader core present bitmap, low word */
+#define GPU_SHADER_PRESENT_HI		0x104	/* (RO) Shader core present bitmap, high word */
+#define GPU_TILER_PRESENT_LO		0x110	/* (RO) Tiler core present bitmap, low word */
+#define GPU_TILER_PRESENT_HI		0x114	/* (RO) Tiler core present bitmap, high word */
+
+#define GPU_L2_PRESENT_LO		0x120	/* (RO) Level 2 cache present bitmap, low word */
+#define GPU_L2_PRESENT_HI		0x124	/* (RO) Level 2 cache present bitmap, high word */
+
+#define GPU_COHERENCY_FEATURES		0x300	/* (RO) Coherency features present */
+#define   COHERENCY_ACE_LITE		BIT(0)
+#define   COHERENCY_ACE			BIT(1)
+
+#define GPU_STACK_PRESENT_LO		0xE00   /* (RO) Core stack present bitmap, low word */
+#define GPU_STACK_PRESENT_HI		0xE04   /* (RO) Core stack present bitmap, high word */
+
+#define SHADER_READY_LO			0x140	/* (RO) Shader core ready bitmap, low word */
+#define SHADER_READY_HI			0x144	/* (RO) Shader core ready bitmap, high word */
+
+#define TILER_READY_LO			0x150	/* (RO) Tiler core ready bitmap, low word */
+#define TILER_READY_HI			0x154	/* (RO) Tiler core ready bitmap, high word */
+
+#define L2_READY_LO			0x160	/* (RO) Level 2 cache ready bitmap, low word */
+#define L2_READY_HI			0x164	/* (RO) Level 2 cache ready bitmap, high word */
+
+#define STACK_READY_LO			0xE10   /* (RO) Core stack ready bitmap, low word */
+#define STACK_READY_HI			0xE14   /* (RO) Core stack ready bitmap, high word */
+
+
+#define SHADER_PWRON_LO			0x180	/* (WO) Shader core power on bitmap, low word */
+#define SHADER_PWRON_HI			0x184	/* (WO) Shader core power on bitmap, high word */
+
+#define TILER_PWRON_LO			0x190	/* (WO) Tiler core power on bitmap, low word */
+#define TILER_PWRON_HI			0x194	/* (WO) Tiler core power on bitmap, high word */
+
+#define L2_PWRON_LO			0x1A0	/* (WO) Level 2 cache power on bitmap, low word */
+#define L2_PWRON_HI			0x1A4	/* (WO) Level 2 cache power on bitmap, high word */
+
+#define STACK_PWRON_LO			0xE20   /* (RO) Core stack power on bitmap, low word */
+#define STACK_PWRON_HI			0xE24   /* (RO) Core stack power on bitmap, high word */
+
+
+#define SHADER_PWROFF_LO		0x1C0	/* (WO) Shader core power off bitmap, low word */
+#define SHADER_PWROFF_HI		0x1C4	/* (WO) Shader core power off bitmap, high word */
+
+#define TILER_PWROFF_LO			0x1D0	/* (WO) Tiler core power off bitmap, low word */
+#define TILER_PWROFF_HI			0x1D4	/* (WO) Tiler core power off bitmap, high word */
+
+#define L2_PWROFF_LO			0x1E0	/* (WO) Level 2 cache power off bitmap, low word */
+#define L2_PWROFF_HI			0x1E4	/* (WO) Level 2 cache power off bitmap, high word */
+
+#define STACK_PWROFF_LO			0xE30   /* (RO) Core stack power off bitmap, low word */
+#define STACK_PWROFF_HI			0xE34   /* (RO) Core stack power off bitmap, high word */
+
+
+#define SHADER_PWRTRANS_LO		0x200	/* (RO) Shader core power transition bitmap, low word */
+#define SHADER_PWRTRANS_HI		0x204	/* (RO) Shader core power transition bitmap, high word */
+
+#define TILER_PWRTRANS_LO		0x210	/* (RO) Tiler core power transition bitmap, low word */
+#define TILER_PWRTRANS_HI		0x214	/* (RO) Tiler core power transition bitmap, high word */
+
+#define L2_PWRTRANS_LO			0x220	/* (RO) Level 2 cache power transition bitmap, low word */
+#define L2_PWRTRANS_HI			0x224	/* (RO) Level 2 cache power transition bitmap, high word */
+
+#define STACK_PWRTRANS_LO		0xE40   /* (RO) Core stack power transition bitmap, low word */
+#define STACK_PWRTRANS_HI		0xE44   /* (RO) Core stack power transition bitmap, high word */
+
+
+#define SHADER_PWRACTIVE_LO		0x240	/* (RO) Shader core active bitmap, low word */
+#define SHADER_PWRACTIVE_HI		0x244	/* (RO) Shader core active bitmap, high word */
+
+#define TILER_PWRACTIVE_LO		0x250	/* (RO) Tiler core active bitmap, low word */
+#define TILER_PWRACTIVE_HI		0x254	/* (RO) Tiler core active bitmap, high word */
+
+#define L2_PWRACTIVE_LO			0x260	/* (RO) Level 2 cache active bitmap, low word */
+#define L2_PWRACTIVE_HI			0x264	/* (RO) Level 2 cache active bitmap, high word */
+
+#define GPU_JM_CONFIG			0xF00   /* (RW) Job Manager configuration register (Implementation specific register) */
+#define GPU_SHADER_CONFIG		0xF04	/* (RW) Shader core configuration settings (Implementation specific register) */
+#define GPU_TILER_CONFIG		0xF08   /* (RW) Tiler core configuration settings (Implementation specific register) */
+#define GPU_L2_MMU_CONFIG		0xF0C	/* (RW) Configuration of the L2 cache and MMU (Implementation specific register) */
+
+/* L2_MMU_CONFIG register */
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT	23
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY		(0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT	24
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS		(0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT	(0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_QUARTER	(0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_HALF		(0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT	26
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES		(0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_OCTANT	(0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_QUARTER	(0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_HALF	(0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+
+#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS_SHIFT	12
+#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS		(0x7 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+
+#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES_SHIFT	15
+#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES	(0x7 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+
+/* SHADER_CONFIG register */
+#define SC_ALT_COUNTERS			BIT(3)
+#define SC_OVERRIDE_FWD_PIXEL_KILL	BIT(4)
+#define SC_SDC_DISABLE_OQ_DISCARD	BIT(6)
+#define SC_LS_ALLOW_ATTR_TYPES		BIT(16)
+#define SC_LS_PAUSEBUFFER_DISABLE	BIT(16)
+#define SC_TLS_HASH_ENABLE		BIT(17)
+#define SC_LS_ATTR_CHECK_DISABLE	BIT(18)
+#define SC_ENABLE_TEXGRD_FLAGS		BIT(25)
+#define SC_VAR_ALGORITHM		BIT(29)
+/* End SHADER_CONFIG register */
+
+/* TILER_CONFIG register */
+#define TC_CLOCK_GATE_OVERRIDE		BIT(0)
+
+/* JM_CONFIG register */
+#define JM_TIMESTAMP_OVERRIDE		BIT(0)
+#define JM_CLOCK_GATE_OVERRIDE		BIT(1)
+#define JM_JOB_THROTTLE_ENABLE		BIT(2)
+#define JM_JOB_THROTTLE_LIMIT_SHIFT	3
+#define JM_MAX_JOB_THROTTLE_LIMIT	0x3F
+#define JM_FORCE_COHERENCY_FEATURES_SHIFT 2
+#define JM_IDVS_GROUP_SIZE_SHIFT	16
+#define JM_DEFAULT_IDVS_GROUP_SIZE	0xF
+#define JM_MAX_IDVS_GROUP_SIZE		0x3F
+
+
+/* Job Control regs */
+#define JOB_INT_RAWSTAT			0x1000
+#define JOB_INT_CLEAR			0x1004
+#define JOB_INT_MASK			0x1008
+#define JOB_INT_STAT			0x100c
+#define JOB_INT_JS_STATE		0x1010
+#define JOB_INT_THROTTLE		0x1014
+
+#define MK_JS_MASK(j)			(0x10001 << (j))
+#define JOB_INT_MASK_ERR(j)		BIT((j) + 16)
+#define JOB_INT_MASK_DONE(j)		BIT(j)
+
+#define JS_BASE				0x1800
+#define JS_SLOT_STRIDE			0x80
+
+#define JS_HEAD_LO(n)			(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x00)
+#define JS_HEAD_HI(n)			(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x04)
+#define JS_TAIL_LO(n)			(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x08)
+#define JS_TAIL_HI(n)			(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x0c)
+#define JS_AFFINITY_LO(n)		(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x10)
+#define JS_AFFINITY_HI(n)		(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x14)
+#define JS_CONFIG(n)			(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x18)
+#define JS_XAFFINITY(n)			(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x1c)
+#define JS_COMMAND(n)			(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x20)
+#define JS_STATUS(n)			(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x24)
+#define JS_HEAD_NEXT_LO(n)		(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x40)
+#define JS_HEAD_NEXT_HI(n)		(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x44)
+#define JS_AFFINITY_NEXT_LO(n)		(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x50)
+#define JS_AFFINITY_NEXT_HI(n)		(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x54)
+#define JS_CONFIG_NEXT(n)		(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x58)
+#define JS_COMMAND_NEXT(n)		(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x60)
+#define JS_FLUSH_ID_NEXT(n)		(JS_BASE + ((n) * JS_SLOT_STRIDE) + 0x70)
+
+/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
+#define JS_CONFIG_START_FLUSH_CLEAN		BIT(8)
+#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE	(3u << 8)
+#define JS_CONFIG_START_MMU			BIT(10)
+#define JS_CONFIG_JOB_CHAIN_FLAG		BIT(11)
+#define JS_CONFIG_END_FLUSH_CLEAN		BIT(12)
+#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE	(3u << 12)
+#define JS_CONFIG_ENABLE_FLUSH_REDUCTION	BIT(14)
+#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK	BIT(15)
+#define JS_CONFIG_THREAD_PRI(n)			((n) << 16)
+
+#define JS_COMMAND_NOP			0x00
+#define JS_COMMAND_START		0x01
+#define JS_COMMAND_SOFT_STOP		0x02	/* Gently stop processing a job chain */
+#define JS_COMMAND_HARD_STOP		0x03	/* Rudely stop processing a job chain */
+#define JS_COMMAND_SOFT_STOP_0		0x04	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_HARD_STOP_0		0x05	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_SOFT_STOP_1		0x06	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
+#define JS_COMMAND_HARD_STOP_1		0x07	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
+
+/* MMU regs */
+#define MMU_INT_RAWSTAT			0x2000
+#define MMU_INT_CLEAR			0x2004
+#define MMU_INT_MASK			0x2008
+#define MMU_INT_STAT			0x200c
+
+/* AS_COMMAND register commands */
+#define AS_COMMAND_NOP			0x00	/* NOP Operation */
+#define AS_COMMAND_UPDATE		0x01	/* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */
+#define AS_COMMAND_LOCK			0x02	/* Issue a lock region command to all MMUs */
+#define AS_COMMAND_UNLOCK		0x03	/* Issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH		0x04	/* Flush all L2 caches then issue a flush region command to all MMUs
+						   (deprecated - only for use with T60x) */
+#define AS_COMMAND_FLUSH_PT		0x04	/* Flush all L2 caches then issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH_MEM		0x05	/* Wait for memory accesses to complete, flush all the L1s cache then
+						   flush all L2 caches then issue a flush region command to all MMUs */
+
+#define MMU_BASE			0x2400
+#define MMU_AS_SHIFT			0x06
+#define MMU_AS(as)			(MMU_BASE + ((as) << MMU_AS_SHIFT))
+
+#define AS_TRANSTAB_LO(as)		(MMU_AS(as) + 0x00) /* (RW) Translation Table Base Address for address space n, low word */
+#define AS_TRANSTAB_HI(as)		(MMU_AS(as) + 0x04) /* (RW) Translation Table Base Address for address space n, high word */
+#define AS_MEMATTR_LO(as)		(MMU_AS(as) + 0x08) /* (RW) Memory attributes for address space n, low word. */
+#define AS_MEMATTR_HI(as)		(MMU_AS(as) + 0x0C) /* (RW) Memory attributes for address space n, high word. */
+#define AS_LOCKADDR_LO(as)		(MMU_AS(as) + 0x10) /* (RW) Lock region address for address space n, low word */
+#define AS_LOCKADDR_HI(as)		(MMU_AS(as) + 0x14) /* (RW) Lock region address for address space n, high word */
+#define AS_COMMAND(as)			(MMU_AS(as) + 0x18) /* (WO) MMU command register for address space n */
+#define AS_FAULTSTATUS(as)		(MMU_AS(as) + 0x1C) /* (RO) MMU fault status register for address space n */
+#define AS_FAULTADDRESS_LO(as)		(MMU_AS(as) + 0x20) /* (RO) Fault Address for address space n, low word */
+#define AS_FAULTADDRESS_HI(as)		(MMU_AS(as) + 0x24) /* (RO) Fault Address for address space n, high word */
+#define AS_STATUS(as)			(MMU_AS(as) + 0x28) /* (RO) Status flags for address space n */
+/* Additional Bifrost AS registers */
+#define AS_TRANSCFG_LO(as)		(MMU_AS(as) + 0x30) /* (RW) Translation table configuration for address space n, low word */
+#define AS_TRANSCFG_HI(as)		(MMU_AS(as) + 0x34) /* (RW) Translation table configuration for address space n, high word */
+#define AS_FAULTEXTRA_LO(as)		(MMU_AS(as) + 0x38) /* (RO) Secondary fault address for address space n, low word */
+#define AS_FAULTEXTRA_HI(as)		(MMU_AS(as) + 0x3C) /* (RO) Secondary fault address for address space n, high word */
+
+#define MMU_AS_STRIDE			(1 << MMU_AS_SHIFT)
+
+/*
+ * Begin LPAE MMU TRANSTAB register values
+ */
+#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK	0xfffffffffffff000
+#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY	0x2
+#define AS_TRANSTAB_LPAE_ADRMODE_TABLE		0x3
+#define AS_TRANSTAB_LPAE_ADRMODE_MASK		0x3
+#define AS_TRANSTAB_LPAE_READ_INNER		BIT(2)
+#define AS_TRANSTAB_LPAE_SHARE_OUTER		BIT(4)
+
+#define AS_STATUS_AS_ACTIVE			0x01
+
+#define AS_FAULTSTATUS_ACCESS_TYPE_MASK		(0x3 << 8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC	(0x0 << 8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_EX		(0x1 << 8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_READ		(0x2 << 8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE	(0x3 << 8)
+
+#define AS_LOCK_REGION_MIN_SIZE                 (1ULL << 15)
+
+#define gpu_write(dev, reg, data) writel(data, dev->iomem + reg)
+#define gpu_read(dev, reg) readl(dev->iomem + reg)
+
+#endif
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-11 08:27:49 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-11 08:27:49 +0000
commit	ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
tree	b2d64bc10158fdd5497876388cd68142ca374ed3 /drivers/gpu/drm/panfrost
parent	Initial commit. (diff)
download	linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip