diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 10:05:51 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 10:05:51 +0000 |
commit | 5d1646d90e1f2cceb9f0828f4b28318cd0ec7744 (patch) | |
tree | a94efe259b9009378be6d90eb30d2b019d95c194 /drivers/gpu/drm/panfrost | |
parent | Initial commit. (diff) | |
download | linux-5d1646d90e1f2cceb9f0828f4b28318cd0ec7744.tar.xz linux-5d1646d90e1f2cceb9f0828f4b28318cd0ec7744.zip |
Adding upstream version 5.10.209.upstream/5.10.209
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/gpu/drm/panfrost')
22 files changed, 5215 insertions, 0 deletions
diff --git a/drivers/gpu/drm/panfrost/Kconfig b/drivers/gpu/drm/panfrost/Kconfig new file mode 100644 index 000000000..77f4d32e5 --- /dev/null +++ b/drivers/gpu/drm/panfrost/Kconfig @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 + +config DRM_PANFROST + tristate "Panfrost (DRM support for ARM Mali Midgard/Bifrost GPUs)" + depends on DRM + depends on ARM || ARM64 || COMPILE_TEST + depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE + depends on MMU + select DRM_SCHED + select IOMMU_SUPPORT + select IOMMU_IO_PGTABLE_LPAE + select DRM_GEM_SHMEM_HELPER + select PM_DEVFREQ + select DEVFREQ_GOV_SIMPLE_ONDEMAND + help + DRM driver for ARM Mali Midgard (T6xx, T7xx, T8xx) and + Bifrost (G3x, G5x, G7x) GPUs. diff --git a/drivers/gpu/drm/panfrost/Makefile b/drivers/gpu/drm/panfrost/Makefile new file mode 100644 index 000000000..b71935862 --- /dev/null +++ b/drivers/gpu/drm/panfrost/Makefile @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0 + +panfrost-y := \ + panfrost_drv.o \ + panfrost_device.o \ + panfrost_devfreq.o \ + panfrost_gem.o \ + panfrost_gem_shrinker.o \ + panfrost_gpu.o \ + panfrost_job.o \ + panfrost_mmu.o \ + panfrost_perfcnt.o + +obj-$(CONFIG_DRM_PANFROST) += panfrost.o diff --git a/drivers/gpu/drm/panfrost/TODO b/drivers/gpu/drm/panfrost/TODO new file mode 100644 index 000000000..8c811a9e6 --- /dev/null +++ b/drivers/gpu/drm/panfrost/TODO @@ -0,0 +1,14 @@ +- Thermal support. + +- Bifrost support: + - DT bindings (Neil, WIP) + - MMU page table format and address space setup + - Bifrost specific feature and issue handling + - Coherent DMA support + +- Support userspace controlled GPU virtual addresses. Needed for Vulkan. (Tomeu) + +- Compute job support. So called 'compute only' jobs need to be plumbed up to + userspace. + +- Support core dump on job failure diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c new file mode 100644 index 000000000..8ab025d00 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2019 Collabora ltd. */ + +#include <linux/clk.h> +#include <linux/devfreq.h> +#include <linux/devfreq_cooling.h> +#include <linux/platform_device.h> +#include <linux/pm_opp.h> + +#include "panfrost_device.h" +#include "panfrost_devfreq.h" + +static void panfrost_devfreq_update_utilization(struct panfrost_devfreq *pfdevfreq) +{ + ktime_t now, last; + + now = ktime_get(); + last = pfdevfreq->time_last_update; + + if (pfdevfreq->busy_count > 0) + pfdevfreq->busy_time += ktime_sub(now, last); + else + pfdevfreq->idle_time += ktime_sub(now, last); + + pfdevfreq->time_last_update = now; +} + +static int panfrost_devfreq_target(struct device *dev, unsigned long *freq, + u32 flags) +{ + struct dev_pm_opp *opp; + int err; + + opp = devfreq_recommended_opp(dev, freq, flags); + if (IS_ERR(opp)) + return PTR_ERR(opp); + dev_pm_opp_put(opp); + + err = dev_pm_opp_set_rate(dev, *freq); + if (err) + return err; + + return 0; +} + +static void panfrost_devfreq_reset(struct panfrost_devfreq *pfdevfreq) +{ + pfdevfreq->busy_time = 0; + pfdevfreq->idle_time = 0; + pfdevfreq->time_last_update = ktime_get(); +} + +static int panfrost_devfreq_get_dev_status(struct device *dev, + struct devfreq_dev_status *status) +{ + struct panfrost_device *pfdev = dev_get_drvdata(dev); + struct panfrost_devfreq *pfdevfreq = &pfdev->pfdevfreq; + unsigned long irqflags; + + status->current_frequency = clk_get_rate(pfdev->clock); + + spin_lock_irqsave(&pfdevfreq->lock, irqflags); + + panfrost_devfreq_update_utilization(pfdevfreq); + + status->total_time = ktime_to_ns(ktime_add(pfdevfreq->busy_time, + pfdevfreq->idle_time)); + + status->busy_time = ktime_to_ns(pfdevfreq->busy_time); + + panfrost_devfreq_reset(pfdevfreq); + + spin_unlock_irqrestore(&pfdevfreq->lock, irqflags); + + dev_dbg(pfdev->dev, "busy %lu total %lu %lu %% freq %lu MHz\n", + status->busy_time, status->total_time, + status->busy_time / (status->total_time / 100), + status->current_frequency / 1000 / 1000); + + return 0; +} + +static struct devfreq_dev_profile panfrost_devfreq_profile = { + .polling_ms = 50, /* ~3 frames */ + .target = panfrost_devfreq_target, + .get_dev_status = panfrost_devfreq_get_dev_status, +}; + +int panfrost_devfreq_init(struct panfrost_device *pfdev) +{ + int ret; + struct dev_pm_opp *opp; + unsigned long cur_freq; + struct device *dev = &pfdev->pdev->dev; + struct devfreq *devfreq; + struct opp_table *opp_table; + struct thermal_cooling_device *cooling; + struct panfrost_devfreq *pfdevfreq = &pfdev->pfdevfreq; + + opp_table = dev_pm_opp_set_regulators(dev, pfdev->comp->supply_names, + pfdev->comp->num_supplies); + if (IS_ERR(opp_table)) { + ret = PTR_ERR(opp_table); + /* Continue if the optional regulator is missing */ + if (ret != -ENODEV) { + DRM_DEV_ERROR(dev, "Couldn't set OPP regulators\n"); + goto err_fini; + } + } else { + pfdevfreq->regulators_opp_table = opp_table; + } + + ret = dev_pm_opp_of_add_table(dev); + if (ret) { + /* Optional, continue without devfreq */ + if (ret == -ENODEV) + ret = 0; + goto err_fini; + } + pfdevfreq->opp_of_table_added = true; + + spin_lock_init(&pfdevfreq->lock); + + panfrost_devfreq_reset(pfdevfreq); + + cur_freq = clk_get_rate(pfdev->clock); + + opp = devfreq_recommended_opp(dev, &cur_freq, 0); + if (IS_ERR(opp)) { + ret = PTR_ERR(opp); + goto err_fini; + } + + panfrost_devfreq_profile.initial_freq = cur_freq; + dev_pm_opp_put(opp); + + devfreq = devm_devfreq_add_device(dev, &panfrost_devfreq_profile, + DEVFREQ_GOV_SIMPLE_ONDEMAND, NULL); + if (IS_ERR(devfreq)) { + DRM_DEV_ERROR(dev, "Couldn't initialize GPU devfreq\n"); + ret = PTR_ERR(devfreq); + goto err_fini; + } + pfdevfreq->devfreq = devfreq; + + cooling = of_devfreq_cooling_register(dev->of_node, devfreq); + if (IS_ERR(cooling)) + DRM_DEV_INFO(dev, "Failed to register cooling device\n"); + else + pfdevfreq->cooling = cooling; + + return 0; + +err_fini: + panfrost_devfreq_fini(pfdev); + return ret; +} + +void panfrost_devfreq_fini(struct panfrost_device *pfdev) +{ + struct panfrost_devfreq *pfdevfreq = &pfdev->pfdevfreq; + + if (pfdevfreq->cooling) { + devfreq_cooling_unregister(pfdevfreq->cooling); + pfdevfreq->cooling = NULL; + } + + if (pfdevfreq->opp_of_table_added) { + dev_pm_opp_of_remove_table(&pfdev->pdev->dev); + pfdevfreq->opp_of_table_added = false; + } + + if (pfdevfreq->regulators_opp_table) { + dev_pm_opp_put_regulators(pfdevfreq->regulators_opp_table); + pfdevfreq->regulators_opp_table = NULL; + } +} + +void panfrost_devfreq_resume(struct panfrost_device *pfdev) +{ + struct panfrost_devfreq *pfdevfreq = &pfdev->pfdevfreq; + + if (!pfdevfreq->devfreq) + return; + + panfrost_devfreq_reset(pfdevfreq); + + devfreq_resume_device(pfdevfreq->devfreq); +} + +void panfrost_devfreq_suspend(struct panfrost_device *pfdev) +{ + struct panfrost_devfreq *pfdevfreq = &pfdev->pfdevfreq; + + if (!pfdevfreq->devfreq) + return; + + devfreq_suspend_device(pfdevfreq->devfreq); +} + +void panfrost_devfreq_record_busy(struct panfrost_devfreq *pfdevfreq) +{ + unsigned long irqflags; + + if (!pfdevfreq->devfreq) + return; + + spin_lock_irqsave(&pfdevfreq->lock, irqflags); + + panfrost_devfreq_update_utilization(pfdevfreq); + + pfdevfreq->busy_count++; + + spin_unlock_irqrestore(&pfdevfreq->lock, irqflags); +} + +void panfrost_devfreq_record_idle(struct panfrost_devfreq *pfdevfreq) +{ + unsigned long irqflags; + + if (!pfdevfreq->devfreq) + return; + + spin_lock_irqsave(&pfdevfreq->lock, irqflags); + + panfrost_devfreq_update_utilization(pfdevfreq); + + WARN_ON(--pfdevfreq->busy_count < 0); + + spin_unlock_irqrestore(&pfdevfreq->lock, irqflags); +} diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.h b/drivers/gpu/drm/panfrost/panfrost_devfreq.h new file mode 100644 index 000000000..db6ea48e2 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright 2019 Collabora ltd. */ + +#ifndef __PANFROST_DEVFREQ_H__ +#define __PANFROST_DEVFREQ_H__ + +#include <linux/spinlock.h> +#include <linux/ktime.h> + +struct devfreq; +struct opp_table; +struct thermal_cooling_device; + +struct panfrost_device; + +struct panfrost_devfreq { + struct devfreq *devfreq; + struct opp_table *regulators_opp_table; + struct thermal_cooling_device *cooling; + bool opp_of_table_added; + + ktime_t busy_time; + ktime_t idle_time; + ktime_t time_last_update; + int busy_count; + /* + * Protect busy_time, idle_time, time_last_update and busy_count + * because these can be updated concurrently between multiple jobs. + */ + spinlock_t lock; +}; + +int panfrost_devfreq_init(struct panfrost_device *pfdev); +void panfrost_devfreq_fini(struct panfrost_device *pfdev); + +void panfrost_devfreq_resume(struct panfrost_device *pfdev); +void panfrost_devfreq_suspend(struct panfrost_device *pfdev); + +void panfrost_devfreq_record_busy(struct panfrost_devfreq *devfreq); +void panfrost_devfreq_record_idle(struct panfrost_devfreq *devfreq); + +#endif /* __PANFROST_DEVFREQ_H__ */ diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c new file mode 100644 index 000000000..c256929e8 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_device.c @@ -0,0 +1,387 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */ +/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */ + +#include <linux/clk.h> +#include <linux/reset.h> +#include <linux/platform_device.h> +#include <linux/pm_domain.h> +#include <linux/regulator/consumer.h> + +#include "panfrost_device.h" +#include "panfrost_devfreq.h" +#include "panfrost_features.h" +#include "panfrost_gpu.h" +#include "panfrost_job.h" +#include "panfrost_mmu.h" +#include "panfrost_perfcnt.h" + +static int panfrost_reset_init(struct panfrost_device *pfdev) +{ + int err; + + pfdev->rstc = devm_reset_control_array_get(pfdev->dev, false, true); + if (IS_ERR(pfdev->rstc)) { + dev_err(pfdev->dev, "get reset failed %ld\n", PTR_ERR(pfdev->rstc)); + return PTR_ERR(pfdev->rstc); + } + + err = reset_control_deassert(pfdev->rstc); + if (err) + return err; + + return 0; +} + +static void panfrost_reset_fini(struct panfrost_device *pfdev) +{ + reset_control_assert(pfdev->rstc); +} + +static int panfrost_clk_init(struct panfrost_device *pfdev) +{ + int err; + unsigned long rate; + + pfdev->clock = devm_clk_get(pfdev->dev, NULL); + if (IS_ERR(pfdev->clock)) { + dev_err(pfdev->dev, "get clock failed %ld\n", PTR_ERR(pfdev->clock)); + return PTR_ERR(pfdev->clock); + } + + rate = clk_get_rate(pfdev->clock); + dev_info(pfdev->dev, "clock rate = %lu\n", rate); + + err = clk_prepare_enable(pfdev->clock); + if (err) + return err; + + pfdev->bus_clock = devm_clk_get_optional(pfdev->dev, "bus"); + if (IS_ERR(pfdev->bus_clock)) { + dev_err(pfdev->dev, "get bus_clock failed %ld\n", + PTR_ERR(pfdev->bus_clock)); + err = PTR_ERR(pfdev->bus_clock); + goto disable_clock; + } + + if (pfdev->bus_clock) { + rate = clk_get_rate(pfdev->bus_clock); + dev_info(pfdev->dev, "bus_clock rate = %lu\n", rate); + + err = clk_prepare_enable(pfdev->bus_clock); + if (err) + goto disable_clock; + } + + return 0; + +disable_clock: + clk_disable_unprepare(pfdev->clock); + + return err; +} + +static void panfrost_clk_fini(struct panfrost_device *pfdev) +{ + clk_disable_unprepare(pfdev->bus_clock); + clk_disable_unprepare(pfdev->clock); +} + +static int panfrost_regulator_init(struct panfrost_device *pfdev) +{ + int ret, i; + + pfdev->regulators = devm_kcalloc(pfdev->dev, pfdev->comp->num_supplies, + sizeof(*pfdev->regulators), + GFP_KERNEL); + if (!pfdev->regulators) + return -ENOMEM; + + for (i = 0; i < pfdev->comp->num_supplies; i++) + pfdev->regulators[i].supply = pfdev->comp->supply_names[i]; + + ret = devm_regulator_bulk_get(pfdev->dev, + pfdev->comp->num_supplies, + pfdev->regulators); + if (ret < 0) { + if (ret != -EPROBE_DEFER) + dev_err(pfdev->dev, "failed to get regulators: %d\n", + ret); + return ret; + } + + ret = regulator_bulk_enable(pfdev->comp->num_supplies, + pfdev->regulators); + if (ret < 0) { + dev_err(pfdev->dev, "failed to enable regulators: %d\n", ret); + return ret; + } + + return 0; +} + +static void panfrost_regulator_fini(struct panfrost_device *pfdev) +{ + if (!pfdev->regulators) + return; + + regulator_bulk_disable(pfdev->comp->num_supplies, pfdev->regulators); +} + +static void panfrost_pm_domain_fini(struct panfrost_device *pfdev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(pfdev->pm_domain_devs); i++) { + if (!pfdev->pm_domain_devs[i]) + break; + + if (pfdev->pm_domain_links[i]) + device_link_del(pfdev->pm_domain_links[i]); + + dev_pm_domain_detach(pfdev->pm_domain_devs[i], true); + } +} + +static int panfrost_pm_domain_init(struct panfrost_device *pfdev) +{ + int err; + int i, num_domains; + + num_domains = of_count_phandle_with_args(pfdev->dev->of_node, + "power-domains", + "#power-domain-cells"); + + /* + * Single domain is handled by the core, and, if only a single power + * the power domain is requested, the property is optional. + */ + if (num_domains < 2 && pfdev->comp->num_pm_domains < 2) + return 0; + + if (num_domains != pfdev->comp->num_pm_domains) { + dev_err(pfdev->dev, + "Incorrect number of power domains: %d provided, %d needed\n", + num_domains, pfdev->comp->num_pm_domains); + return -EINVAL; + } + + if (WARN(num_domains > ARRAY_SIZE(pfdev->pm_domain_devs), + "Too many supplies in compatible structure.\n")) + return -EINVAL; + + for (i = 0; i < num_domains; i++) { + pfdev->pm_domain_devs[i] = + dev_pm_domain_attach_by_name(pfdev->dev, + pfdev->comp->pm_domain_names[i]); + if (IS_ERR_OR_NULL(pfdev->pm_domain_devs[i])) { + err = PTR_ERR(pfdev->pm_domain_devs[i]) ? : -ENODATA; + pfdev->pm_domain_devs[i] = NULL; + dev_err(pfdev->dev, + "failed to get pm-domain %s(%d): %d\n", + pfdev->comp->pm_domain_names[i], i, err); + goto err; + } + + pfdev->pm_domain_links[i] = device_link_add(pfdev->dev, + pfdev->pm_domain_devs[i], DL_FLAG_PM_RUNTIME | + DL_FLAG_STATELESS | DL_FLAG_RPM_ACTIVE); + if (!pfdev->pm_domain_links[i]) { + dev_err(pfdev->pm_domain_devs[i], + "adding device link failed!\n"); + err = -ENODEV; + goto err; + } + } + + return 0; + +err: + panfrost_pm_domain_fini(pfdev); + return err; +} + +int panfrost_device_init(struct panfrost_device *pfdev) +{ + int err; + struct resource *res; + + mutex_init(&pfdev->sched_lock); + INIT_LIST_HEAD(&pfdev->scheduled_jobs); + INIT_LIST_HEAD(&pfdev->as_lru_list); + + spin_lock_init(&pfdev->as_lock); + + err = panfrost_clk_init(pfdev); + if (err) { + dev_err(pfdev->dev, "clk init failed %d\n", err); + return err; + } + + err = panfrost_devfreq_init(pfdev); + if (err) { + if (err != -EPROBE_DEFER) + dev_err(pfdev->dev, "devfreq init failed %d\n", err); + goto out_clk; + } + + /* OPP will handle regulators */ + if (!pfdev->pfdevfreq.opp_of_table_added) { + err = panfrost_regulator_init(pfdev); + if (err) + goto out_devfreq; + } + + err = panfrost_reset_init(pfdev); + if (err) { + dev_err(pfdev->dev, "reset init failed %d\n", err); + goto out_regulator; + } + + err = panfrost_pm_domain_init(pfdev); + if (err) + goto out_reset; + + res = platform_get_resource(pfdev->pdev, IORESOURCE_MEM, 0); + pfdev->iomem = devm_ioremap_resource(pfdev->dev, res); + if (IS_ERR(pfdev->iomem)) { + dev_err(pfdev->dev, "failed to ioremap iomem\n"); + err = PTR_ERR(pfdev->iomem); + goto out_pm_domain; + } + + err = panfrost_gpu_init(pfdev); + if (err) + goto out_pm_domain; + + err = panfrost_mmu_init(pfdev); + if (err) + goto out_gpu; + + err = panfrost_job_init(pfdev); + if (err) + goto out_mmu; + + err = panfrost_perfcnt_init(pfdev); + if (err) + goto out_job; + + return 0; +out_job: + panfrost_job_fini(pfdev); +out_mmu: + panfrost_mmu_fini(pfdev); +out_gpu: + panfrost_gpu_fini(pfdev); +out_pm_domain: + panfrost_pm_domain_fini(pfdev); +out_reset: + panfrost_reset_fini(pfdev); +out_regulator: + panfrost_regulator_fini(pfdev); +out_devfreq: + panfrost_devfreq_fini(pfdev); +out_clk: + panfrost_clk_fini(pfdev); + return err; +} + +void panfrost_device_fini(struct panfrost_device *pfdev) +{ + panfrost_perfcnt_fini(pfdev); + panfrost_job_fini(pfdev); + panfrost_mmu_fini(pfdev); + panfrost_gpu_fini(pfdev); + panfrost_pm_domain_fini(pfdev); + panfrost_reset_fini(pfdev); + panfrost_devfreq_fini(pfdev); + panfrost_regulator_fini(pfdev); + panfrost_clk_fini(pfdev); +} + +const char *panfrost_exception_name(struct panfrost_device *pfdev, u32 exception_code) +{ + switch (exception_code) { + /* Non-Fault Status code */ + case 0x00: return "NOT_STARTED/IDLE/OK"; + case 0x01: return "DONE"; + case 0x02: return "INTERRUPTED"; + case 0x03: return "STOPPED"; + case 0x04: return "TERMINATED"; + case 0x08: return "ACTIVE"; + /* Job exceptions */ + case 0x40: return "JOB_CONFIG_FAULT"; + case 0x41: return "JOB_POWER_FAULT"; + case 0x42: return "JOB_READ_FAULT"; + case 0x43: return "JOB_WRITE_FAULT"; + case 0x44: return "JOB_AFFINITY_FAULT"; + case 0x48: return "JOB_BUS_FAULT"; + case 0x50: return "INSTR_INVALID_PC"; + case 0x51: return "INSTR_INVALID_ENC"; + case 0x52: return "INSTR_TYPE_MISMATCH"; + case 0x53: return "INSTR_OPERAND_FAULT"; + case 0x54: return "INSTR_TLS_FAULT"; + case 0x55: return "INSTR_BARRIER_FAULT"; + case 0x56: return "INSTR_ALIGN_FAULT"; + case 0x58: return "DATA_INVALID_FAULT"; + case 0x59: return "TILE_RANGE_FAULT"; + case 0x5A: return "ADDR_RANGE_FAULT"; + case 0x60: return "OUT_OF_MEMORY"; + /* GPU exceptions */ + case 0x80: return "DELAYED_BUS_FAULT"; + case 0x88: return "SHAREABILITY_FAULT"; + /* MMU exceptions */ + case 0xC1: return "TRANSLATION_FAULT_LEVEL1"; + case 0xC2: return "TRANSLATION_FAULT_LEVEL2"; + case 0xC3: return "TRANSLATION_FAULT_LEVEL3"; + case 0xC4: return "TRANSLATION_FAULT_LEVEL4"; + case 0xC8: return "PERMISSION_FAULT"; + case 0xC9 ... 0xCF: return "PERMISSION_FAULT"; + case 0xD1: return "TRANSTAB_BUS_FAULT_LEVEL1"; + case 0xD2: return "TRANSTAB_BUS_FAULT_LEVEL2"; + case 0xD3: return "TRANSTAB_BUS_FAULT_LEVEL3"; + case 0xD4: return "TRANSTAB_BUS_FAULT_LEVEL4"; + case 0xD8: return "ACCESS_FLAG"; + case 0xD9 ... 0xDF: return "ACCESS_FLAG"; + case 0xE0 ... 0xE7: return "ADDRESS_SIZE_FAULT"; + case 0xE8 ... 0xEF: return "MEMORY_ATTRIBUTES_FAULT"; + } + + return "UNKNOWN"; +} + +void panfrost_device_reset(struct panfrost_device *pfdev) +{ + panfrost_gpu_soft_reset(pfdev); + + panfrost_gpu_power_on(pfdev); + panfrost_mmu_reset(pfdev); + panfrost_job_enable_interrupts(pfdev); +} + +#ifdef CONFIG_PM +int panfrost_device_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct panfrost_device *pfdev = platform_get_drvdata(pdev); + + panfrost_device_reset(pfdev); + panfrost_devfreq_resume(pfdev); + + return 0; +} + +int panfrost_device_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct panfrost_device *pfdev = platform_get_drvdata(pdev); + + if (!panfrost_job_is_idle(pfdev)) + return -EBUSY; + + panfrost_devfreq_suspend(pfdev); + panfrost_gpu_power_off(pfdev); + + return 0; +} +#endif diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h new file mode 100644 index 000000000..4c6bdea55 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_device.h @@ -0,0 +1,177 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */ +/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */ + +#ifndef __PANFROST_DEVICE_H__ +#define __PANFROST_DEVICE_H__ + +#include <linux/atomic.h> +#include <linux/io-pgtable.h> +#include <linux/regulator/consumer.h> +#include <linux/spinlock.h> +#include <drm/drm_device.h> +#include <drm/drm_mm.h> +#include <drm/gpu_scheduler.h> + +#include "panfrost_devfreq.h" + +struct panfrost_device; +struct panfrost_mmu; +struct panfrost_job_slot; +struct panfrost_job; +struct panfrost_perfcnt; + +#define NUM_JOB_SLOTS 3 +#define MAX_PM_DOMAINS 3 + +struct panfrost_features { + u16 id; + u16 revision; + + u64 shader_present; + u64 tiler_present; + u64 l2_present; + u64 stack_present; + u32 as_present; + u32 js_present; + + u32 l2_features; + u32 core_features; + u32 tiler_features; + u32 mem_features; + u32 mmu_features; + u32 thread_features; + u32 max_threads; + u32 thread_max_workgroup_sz; + u32 thread_max_barrier_sz; + u32 coherency_features; + u32 texture_features[4]; + u32 js_features[16]; + + u32 nr_core_groups; + u32 thread_tls_alloc; + + unsigned long hw_features[64 / BITS_PER_LONG]; + unsigned long hw_issues[64 / BITS_PER_LONG]; +}; + +/* + * Features that cannot be automatically detected and need matching using the + * compatible string, typically SoC-specific. + */ +struct panfrost_compatible { + /* Supplies count and names. */ + int num_supplies; + const char * const *supply_names; + /* + * Number of power domains required, note that values 0 and 1 are + * handled identically, as only values > 1 need special handling. + */ + int num_pm_domains; + /* Only required if num_pm_domains > 1. */ + const char * const *pm_domain_names; + + /* Vendor implementation quirks callback */ + void (*vendor_quirk)(struct panfrost_device *pfdev); +}; + +struct panfrost_device { + struct device *dev; + struct drm_device *ddev; + struct platform_device *pdev; + + void __iomem *iomem; + struct clk *clock; + struct clk *bus_clock; + struct regulator_bulk_data *regulators; + struct reset_control *rstc; + /* pm_domains for devices with more than one. */ + struct device *pm_domain_devs[MAX_PM_DOMAINS]; + struct device_link *pm_domain_links[MAX_PM_DOMAINS]; + bool coherent; + + struct panfrost_features features; + const struct panfrost_compatible *comp; + + spinlock_t as_lock; + unsigned long as_in_use_mask; + unsigned long as_alloc_mask; + struct list_head as_lru_list; + + struct panfrost_job_slot *js; + + struct panfrost_job *jobs[NUM_JOB_SLOTS]; + struct list_head scheduled_jobs; + + struct panfrost_perfcnt *perfcnt; + + struct mutex sched_lock; + + struct { + struct work_struct work; + atomic_t pending; + } reset; + + struct mutex shrinker_lock; + struct list_head shrinker_list; + struct shrinker shrinker; + + struct panfrost_devfreq pfdevfreq; +}; + +struct panfrost_mmu { + struct panfrost_device *pfdev; + struct kref refcount; + struct io_pgtable_cfg pgtbl_cfg; + struct io_pgtable_ops *pgtbl_ops; + struct drm_mm mm; + spinlock_t mm_lock; + int as; + atomic_t as_count; + struct list_head list; +}; + +struct panfrost_file_priv { + struct panfrost_device *pfdev; + + struct drm_sched_entity sched_entity[NUM_JOB_SLOTS]; + + struct panfrost_mmu *mmu; +}; + +static inline struct panfrost_device *to_panfrost_device(struct drm_device *ddev) +{ + return ddev->dev_private; +} + +static inline int panfrost_model_cmp(struct panfrost_device *pfdev, s32 id) +{ + s32 match_id = pfdev->features.id; + + if (match_id & 0xf000) + match_id &= 0xf00f; + return match_id - id; +} + +static inline bool panfrost_model_is_bifrost(struct panfrost_device *pfdev) +{ + return panfrost_model_cmp(pfdev, 0x1000) >= 0; +} + +static inline bool panfrost_model_eq(struct panfrost_device *pfdev, s32 id) +{ + return !panfrost_model_cmp(pfdev, id); +} + +int panfrost_unstable_ioctl_check(void); + +int panfrost_device_init(struct panfrost_device *pfdev); +void panfrost_device_fini(struct panfrost_device *pfdev); +void panfrost_device_reset(struct panfrost_device *pfdev); + +int panfrost_device_resume(struct device *dev); +int panfrost_device_suspend(struct device *dev); + +const char *panfrost_exception_name(struct panfrost_device *pfdev, u32 exception_code); + +#endif diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c new file mode 100644 index 000000000..4af25c0b6 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -0,0 +1,683 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */ +/* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */ +/* Copyright 2019 Collabora ltd. */ + +#include <linux/module.h> +#include <linux/of_platform.h> +#include <linux/pagemap.h> +#include <linux/pm_runtime.h> +#include <drm/panfrost_drm.h> +#include <drm/drm_drv.h> +#include <drm/drm_ioctl.h> +#include <drm/drm_syncobj.h> +#include <drm/drm_utils.h> + +#include "panfrost_device.h" +#include "panfrost_gem.h" +#include "panfrost_mmu.h" +#include "panfrost_job.h" +#include "panfrost_gpu.h" +#include "panfrost_perfcnt.h" + +static bool unstable_ioctls; +module_param_unsafe(unstable_ioctls, bool, 0600); + +static int panfrost_ioctl_get_param(struct drm_device *ddev, void *data, struct drm_file *file) +{ + struct drm_panfrost_get_param *param = data; + struct panfrost_device *pfdev = ddev->dev_private; + + if (param->pad != 0) + return -EINVAL; + +#define PANFROST_FEATURE(name, member) \ + case DRM_PANFROST_PARAM_ ## name: \ + param->value = pfdev->features.member; \ + break +#define PANFROST_FEATURE_ARRAY(name, member, max) \ + case DRM_PANFROST_PARAM_ ## name ## 0 ... \ + DRM_PANFROST_PARAM_ ## name ## max: \ + param->value = pfdev->features.member[param->param - \ + DRM_PANFROST_PARAM_ ## name ## 0]; \ + break + + switch (param->param) { + PANFROST_FEATURE(GPU_PROD_ID, id); + PANFROST_FEATURE(GPU_REVISION, revision); + PANFROST_FEATURE(SHADER_PRESENT, shader_present); + PANFROST_FEATURE(TILER_PRESENT, tiler_present); + PANFROST_FEATURE(L2_PRESENT, l2_present); + PANFROST_FEATURE(STACK_PRESENT, stack_present); + PANFROST_FEATURE(AS_PRESENT, as_present); + PANFROST_FEATURE(JS_PRESENT, js_present); + PANFROST_FEATURE(L2_FEATURES, l2_features); + PANFROST_FEATURE(CORE_FEATURES, core_features); + PANFROST_FEATURE(TILER_FEATURES, tiler_features); + PANFROST_FEATURE(MEM_FEATURES, mem_features); + PANFROST_FEATURE(MMU_FEATURES, mmu_features); + PANFROST_FEATURE(THREAD_FEATURES, thread_features); + PANFROST_FEATURE(MAX_THREADS, max_threads); + PANFROST_FEATURE(THREAD_MAX_WORKGROUP_SZ, + thread_max_workgroup_sz); + PANFROST_FEATURE(THREAD_MAX_BARRIER_SZ, + thread_max_barrier_sz); + PANFROST_FEATURE(COHERENCY_FEATURES, coherency_features); + PANFROST_FEATURE_ARRAY(TEXTURE_FEATURES, texture_features, 3); + PANFROST_FEATURE_ARRAY(JS_FEATURES, js_features, 15); + PANFROST_FEATURE(NR_CORE_GROUPS, nr_core_groups); + PANFROST_FEATURE(THREAD_TLS_ALLOC, thread_tls_alloc); + default: + return -EINVAL; + } + + return 0; +} + +static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct panfrost_file_priv *priv = file->driver_priv; + struct panfrost_gem_object *bo; + struct drm_panfrost_create_bo *args = data; + struct panfrost_gem_mapping *mapping; + int ret; + + if (!args->size || args->pad || + (args->flags & ~(PANFROST_BO_NOEXEC | PANFROST_BO_HEAP))) + return -EINVAL; + + /* Heaps should never be executable */ + if ((args->flags & PANFROST_BO_HEAP) && + !(args->flags & PANFROST_BO_NOEXEC)) + return -EINVAL; + + bo = panfrost_gem_create(dev, args->size, args->flags); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + ret = drm_gem_handle_create(file, &bo->base.base, &args->handle); + if (ret) + goto out; + + mapping = panfrost_gem_mapping_get(bo, priv); + if (mapping) { + args->offset = mapping->mmnode.start << PAGE_SHIFT; + panfrost_gem_mapping_put(mapping); + } else { + /* This can only happen if the handle from + * drm_gem_handle_create() has already been guessed and freed + * by user space + */ + ret = -EINVAL; + } + +out: + drm_gem_object_put(&bo->base.base); + return ret; +} + +/** + * panfrost_lookup_bos() - Sets up job->bo[] with the GEM objects + * referenced by the job. + * @dev: DRM device + * @file_priv: DRM file for this fd + * @args: IOCTL args + * @job: job being set up + * + * Resolve handles from userspace to BOs and attach them to job. + * + * Note that this function doesn't need to unreference the BOs on + * failure, because that will happen at panfrost_job_cleanup() time. + */ +static int +panfrost_lookup_bos(struct drm_device *dev, + struct drm_file *file_priv, + struct drm_panfrost_submit *args, + struct panfrost_job *job) +{ + struct panfrost_file_priv *priv = file_priv->driver_priv; + struct panfrost_gem_object *bo; + unsigned int i; + int ret; + + job->bo_count = args->bo_handle_count; + + if (!job->bo_count) + return 0; + + job->implicit_fences = kvmalloc_array(job->bo_count, + sizeof(struct dma_fence *), + GFP_KERNEL | __GFP_ZERO); + if (!job->implicit_fences) + return -ENOMEM; + + ret = drm_gem_objects_lookup(file_priv, + (void __user *)(uintptr_t)args->bo_handles, + job->bo_count, &job->bos); + if (ret) + return ret; + + job->mappings = kvmalloc_array(job->bo_count, + sizeof(struct panfrost_gem_mapping *), + GFP_KERNEL | __GFP_ZERO); + if (!job->mappings) + return -ENOMEM; + + for (i = 0; i < job->bo_count; i++) { + struct panfrost_gem_mapping *mapping; + + bo = to_panfrost_bo(job->bos[i]); + mapping = panfrost_gem_mapping_get(bo, priv); + if (!mapping) { + ret = -EINVAL; + break; + } + + atomic_inc(&bo->gpu_usecount); + job->mappings[i] = mapping; + } + + return ret; +} + +/** + * panfrost_copy_in_sync() - Sets up job->in_fences[] with the sync objects + * referenced by the job. + * @dev: DRM device + * @file_priv: DRM file for this fd + * @args: IOCTL args + * @job: job being set up + * + * Resolve syncobjs from userspace to fences and attach them to job. + * + * Note that this function doesn't need to unreference the fences on + * failure, because that will happen at panfrost_job_cleanup() time. + */ +static int +panfrost_copy_in_sync(struct drm_device *dev, + struct drm_file *file_priv, + struct drm_panfrost_submit *args, + struct panfrost_job *job) +{ + u32 *handles; + int ret = 0; + int i; + + job->in_fence_count = args->in_sync_count; + + if (!job->in_fence_count) + return 0; + + job->in_fences = kvmalloc_array(job->in_fence_count, + sizeof(struct dma_fence *), + GFP_KERNEL | __GFP_ZERO); + if (!job->in_fences) { + DRM_DEBUG("Failed to allocate job in fences\n"); + return -ENOMEM; + } + + handles = kvmalloc_array(job->in_fence_count, sizeof(u32), GFP_KERNEL); + if (!handles) { + ret = -ENOMEM; + DRM_DEBUG("Failed to allocate incoming syncobj handles\n"); + goto fail; + } + + if (copy_from_user(handles, + (void __user *)(uintptr_t)args->in_syncs, + job->in_fence_count * sizeof(u32))) { + ret = -EFAULT; + DRM_DEBUG("Failed to copy in syncobj handles\n"); + goto fail; + } + + for (i = 0; i < job->in_fence_count; i++) { + ret = drm_syncobj_find_fence(file_priv, handles[i], 0, 0, + &job->in_fences[i]); + if (ret == -EINVAL) + goto fail; + } + +fail: + kvfree(handles); + return ret; +} + +static int panfrost_ioctl_submit(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct panfrost_device *pfdev = dev->dev_private; + struct drm_panfrost_submit *args = data; + struct drm_syncobj *sync_out = NULL; + struct panfrost_job *job; + int ret = 0; + + if (!args->jc) + return -EINVAL; + + if (args->requirements && args->requirements != PANFROST_JD_REQ_FS) + return -EINVAL; + + if (args->out_sync > 0) { + sync_out = drm_syncobj_find(file, args->out_sync); + if (!sync_out) + return -ENODEV; + } + + job = kzalloc(sizeof(*job), GFP_KERNEL); + if (!job) { + ret = -ENOMEM; + goto fail_out_sync; + } + + kref_init(&job->refcount); + + job->pfdev = pfdev; + job->jc = args->jc; + job->requirements = args->requirements; + job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev); + job->file_priv = file->driver_priv; + + ret = panfrost_copy_in_sync(dev, file, args, job); + if (ret) + goto fail_job; + + ret = panfrost_lookup_bos(dev, file, args, job); + if (ret) + goto fail_job; + + ret = panfrost_job_push(job); + if (ret) + goto fail_job; + + /* Update the return sync object for the job */ + if (sync_out) + drm_syncobj_replace_fence(sync_out, job->render_done_fence); + +fail_job: + panfrost_job_put(job); +fail_out_sync: + if (sync_out) + drm_syncobj_put(sync_out); + + return ret; +} + +static int +panfrost_ioctl_wait_bo(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + long ret; + struct drm_panfrost_wait_bo *args = data; + struct drm_gem_object *gem_obj; + unsigned long timeout = drm_timeout_abs_to_jiffies(args->timeout_ns); + + if (args->pad) + return -EINVAL; + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) + return -ENOENT; + + ret = dma_resv_wait_timeout_rcu(gem_obj->resv, true, + true, timeout); + if (!ret) + ret = timeout ? -ETIMEDOUT : -EBUSY; + + drm_gem_object_put(gem_obj); + + return ret; +} + +static int panfrost_ioctl_mmap_bo(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_panfrost_mmap_bo *args = data; + struct drm_gem_object *gem_obj; + int ret; + + if (args->flags != 0) { + DRM_INFO("unknown mmap_bo flags: %d\n", args->flags); + return -EINVAL; + } + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) { + DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); + return -ENOENT; + } + + /* Don't allow mmapping of heap objects as pages are not pinned. */ + if (to_panfrost_bo(gem_obj)->is_heap) { + ret = -EINVAL; + goto out; + } + + ret = drm_gem_create_mmap_offset(gem_obj); + if (ret == 0) + args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node); + +out: + drm_gem_object_put(gem_obj); + return ret; +} + +static int panfrost_ioctl_get_bo_offset(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct panfrost_file_priv *priv = file_priv->driver_priv; + struct drm_panfrost_get_bo_offset *args = data; + struct panfrost_gem_mapping *mapping; + struct drm_gem_object *gem_obj; + struct panfrost_gem_object *bo; + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) { + DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); + return -ENOENT; + } + bo = to_panfrost_bo(gem_obj); + + mapping = panfrost_gem_mapping_get(bo, priv); + drm_gem_object_put(gem_obj); + + if (!mapping) + return -EINVAL; + + args->offset = mapping->mmnode.start << PAGE_SHIFT; + panfrost_gem_mapping_put(mapping); + return 0; +} + +static int panfrost_ioctl_madvise(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct panfrost_file_priv *priv = file_priv->driver_priv; + struct drm_panfrost_madvise *args = data; + struct panfrost_device *pfdev = dev->dev_private; + struct drm_gem_object *gem_obj; + struct panfrost_gem_object *bo; + int ret = 0; + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) { + DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); + return -ENOENT; + } + + bo = to_panfrost_bo(gem_obj); + + mutex_lock(&pfdev->shrinker_lock); + mutex_lock(&bo->mappings.lock); + if (args->madv == PANFROST_MADV_DONTNEED) { + struct panfrost_gem_mapping *first; + + first = list_first_entry(&bo->mappings.list, + struct panfrost_gem_mapping, + node); + + /* + * If we want to mark the BO purgeable, there must be only one + * user: the caller FD. + * We could do something smarter and mark the BO purgeable only + * when all its users have marked it purgeable, but globally + * visible/shared BOs are likely to never be marked purgeable + * anyway, so let's not bother. + */ + if (!list_is_singular(&bo->mappings.list) || + WARN_ON_ONCE(first->mmu != priv->mmu)) { + ret = -EINVAL; + goto out_unlock_mappings; + } + } + + args->retained = drm_gem_shmem_madvise(gem_obj, args->madv); + + if (args->retained) { + if (args->madv == PANFROST_MADV_DONTNEED) + list_move_tail(&bo->base.madv_list, + &pfdev->shrinker_list); + else if (args->madv == PANFROST_MADV_WILLNEED) + list_del_init(&bo->base.madv_list); + } + +out_unlock_mappings: + mutex_unlock(&bo->mappings.lock); + mutex_unlock(&pfdev->shrinker_lock); + + drm_gem_object_put(gem_obj); + return ret; +} + +int panfrost_unstable_ioctl_check(void) +{ + if (!unstable_ioctls) + return -ENOSYS; + + return 0; +} + +static int +panfrost_open(struct drm_device *dev, struct drm_file *file) +{ + int ret; + struct panfrost_device *pfdev = dev->dev_private; + struct panfrost_file_priv *panfrost_priv; + + panfrost_priv = kzalloc(sizeof(*panfrost_priv), GFP_KERNEL); + if (!panfrost_priv) + return -ENOMEM; + + panfrost_priv->pfdev = pfdev; + file->driver_priv = panfrost_priv; + + panfrost_priv->mmu = panfrost_mmu_ctx_create(pfdev); + if (IS_ERR(panfrost_priv->mmu)) { + ret = PTR_ERR(panfrost_priv->mmu); + goto err_free; + } + + ret = panfrost_job_open(panfrost_priv); + if (ret) + goto err_job; + + return 0; + +err_job: + panfrost_mmu_ctx_put(panfrost_priv->mmu); +err_free: + kfree(panfrost_priv); + return ret; +} + +static void +panfrost_postclose(struct drm_device *dev, struct drm_file *file) +{ + struct panfrost_file_priv *panfrost_priv = file->driver_priv; + + panfrost_perfcnt_close(file); + panfrost_job_close(panfrost_priv); + + panfrost_mmu_ctx_put(panfrost_priv->mmu); + kfree(panfrost_priv); +} + +static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = { +#define PANFROST_IOCTL(n, func, flags) \ + DRM_IOCTL_DEF_DRV(PANFROST_##n, panfrost_ioctl_##func, flags) + + PANFROST_IOCTL(SUBMIT, submit, DRM_RENDER_ALLOW), + PANFROST_IOCTL(WAIT_BO, wait_bo, DRM_RENDER_ALLOW), + PANFROST_IOCTL(CREATE_BO, create_bo, DRM_RENDER_ALLOW), + PANFROST_IOCTL(MMAP_BO, mmap_bo, DRM_RENDER_ALLOW), + PANFROST_IOCTL(GET_PARAM, get_param, DRM_RENDER_ALLOW), + PANFROST_IOCTL(GET_BO_OFFSET, get_bo_offset, DRM_RENDER_ALLOW), + PANFROST_IOCTL(PERFCNT_ENABLE, perfcnt_enable, DRM_RENDER_ALLOW), + PANFROST_IOCTL(PERFCNT_DUMP, perfcnt_dump, DRM_RENDER_ALLOW), + PANFROST_IOCTL(MADVISE, madvise, DRM_RENDER_ALLOW), +}; + +DEFINE_DRM_GEM_FOPS(panfrost_drm_driver_fops); + +/* + * Panfrost driver version: + * - 1.0 - initial interface + * - 1.1 - adds HEAP and NOEXEC flags for CREATE_BO + */ +static struct drm_driver panfrost_drm_driver = { + .driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ, + .open = panfrost_open, + .postclose = panfrost_postclose, + .ioctls = panfrost_drm_driver_ioctls, + .num_ioctls = ARRAY_SIZE(panfrost_drm_driver_ioctls), + .fops = &panfrost_drm_driver_fops, + .name = "panfrost", + .desc = "panfrost DRM", + .date = "20180908", + .major = 1, + .minor = 1, + + .gem_create_object = panfrost_gem_create_object, + .prime_handle_to_fd = drm_gem_prime_handle_to_fd, + .prime_fd_to_handle = drm_gem_prime_fd_to_handle, + .gem_prime_import_sg_table = panfrost_gem_prime_import_sg_table, + .gem_prime_mmap = drm_gem_prime_mmap, +}; + +static int panfrost_probe(struct platform_device *pdev) +{ + struct panfrost_device *pfdev; + struct drm_device *ddev; + int err; + + pfdev = devm_kzalloc(&pdev->dev, sizeof(*pfdev), GFP_KERNEL); + if (!pfdev) + return -ENOMEM; + + pfdev->pdev = pdev; + pfdev->dev = &pdev->dev; + + platform_set_drvdata(pdev, pfdev); + + pfdev->comp = of_device_get_match_data(&pdev->dev); + if (!pfdev->comp) + return -ENODEV; + + pfdev->coherent = device_get_dma_attr(&pdev->dev) == DEV_DMA_COHERENT; + + /* Allocate and initialze the DRM device. */ + ddev = drm_dev_alloc(&panfrost_drm_driver, &pdev->dev); + if (IS_ERR(ddev)) + return PTR_ERR(ddev); + + ddev->dev_private = pfdev; + pfdev->ddev = ddev; + + mutex_init(&pfdev->shrinker_lock); + INIT_LIST_HEAD(&pfdev->shrinker_list); + + err = panfrost_device_init(pfdev); + if (err) { + if (err != -EPROBE_DEFER) + dev_err(&pdev->dev, "Fatal error during GPU init\n"); + goto err_out0; + } + + pm_runtime_set_active(pfdev->dev); + pm_runtime_mark_last_busy(pfdev->dev); + pm_runtime_enable(pfdev->dev); + pm_runtime_set_autosuspend_delay(pfdev->dev, 50); /* ~3 frames */ + pm_runtime_use_autosuspend(pfdev->dev); + + /* + * Register the DRM device with the core and the connectors with + * sysfs + */ + err = drm_dev_register(ddev, 0); + if (err < 0) + goto err_out1; + + panfrost_gem_shrinker_init(ddev); + + return 0; + +err_out1: + pm_runtime_disable(pfdev->dev); + panfrost_device_fini(pfdev); + pm_runtime_set_suspended(pfdev->dev); +err_out0: + drm_dev_put(ddev); + return err; +} + +static int panfrost_remove(struct platform_device *pdev) +{ + struct panfrost_device *pfdev = platform_get_drvdata(pdev); + struct drm_device *ddev = pfdev->ddev; + + drm_dev_unregister(ddev); + panfrost_gem_shrinker_cleanup(ddev); + + pm_runtime_get_sync(pfdev->dev); + pm_runtime_disable(pfdev->dev); + panfrost_device_fini(pfdev); + pm_runtime_set_suspended(pfdev->dev); + + drm_dev_put(ddev); + return 0; +} + +static const char * const default_supplies[] = { "mali" }; +static const struct panfrost_compatible default_data = { + .num_supplies = ARRAY_SIZE(default_supplies), + .supply_names = default_supplies, + .num_pm_domains = 1, /* optional */ + .pm_domain_names = NULL, +}; + +static const struct panfrost_compatible amlogic_data = { + .num_supplies = ARRAY_SIZE(default_supplies), + .supply_names = default_supplies, + .vendor_quirk = panfrost_gpu_amlogic_quirk, +}; + +static const struct of_device_id dt_match[] = { + /* Set first to probe before the generic compatibles */ + { .compatible = "amlogic,meson-gxm-mali", + .data = &amlogic_data, }, + { .compatible = "amlogic,meson-g12a-mali", + .data = &amlogic_data, }, + { .compatible = "arm,mali-t604", .data = &default_data, }, + { .compatible = "arm,mali-t624", .data = &default_data, }, + { .compatible = "arm,mali-t628", .data = &default_data, }, + { .compatible = "arm,mali-t720", .data = &default_data, }, + { .compatible = "arm,mali-t760", .data = &default_data, }, + { .compatible = "arm,mali-t820", .data = &default_data, }, + { .compatible = "arm,mali-t830", .data = &default_data, }, + { .compatible = "arm,mali-t860", .data = &default_data, }, + { .compatible = "arm,mali-t880", .data = &default_data, }, + { .compatible = "arm,mali-bifrost", .data = &default_data, }, + {} +}; +MODULE_DEVICE_TABLE(of, dt_match); + +static const struct dev_pm_ops panfrost_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) + SET_RUNTIME_PM_OPS(panfrost_device_suspend, panfrost_device_resume, NULL) +}; + +static struct platform_driver panfrost_driver = { + .probe = panfrost_probe, + .remove = panfrost_remove, + .driver = { + .name = "panfrost", + .pm = &panfrost_pm_ops, + .of_match_table = dt_match, + }, +}; +module_platform_driver(panfrost_driver); + +MODULE_AUTHOR("Panfrost Project Developers"); +MODULE_DESCRIPTION("Panfrost DRM Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gpu/drm/panfrost/panfrost_features.h b/drivers/gpu/drm/panfrost/panfrost_features.h new file mode 100644 index 000000000..5056777c7 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_features.h @@ -0,0 +1,309 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. */ +/* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */ +#ifndef __PANFROST_FEATURES_H__ +#define __PANFROST_FEATURES_H__ + +#include <linux/bitops.h> + +#include "panfrost_device.h" + +enum panfrost_hw_feature { + HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + HW_FEATURE_XAFFINITY, + HW_FEATURE_OUT_OF_ORDER_EXEC, + HW_FEATURE_MRT, + HW_FEATURE_BRNDOUT_CC, + HW_FEATURE_INTERPIPE_REG_ALIASING, + HW_FEATURE_LD_ST_TILEBUFFER, + HW_FEATURE_MSAA_16X, + HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + HW_FEATURE_OPTIMIZED_COVERAGE_MASK, + HW_FEATURE_T7XX_PAIRING_RULES, + HW_FEATURE_LD_ST_LEA_TEX, + HW_FEATURE_LINEAR_FILTER_FLOAT, + HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4, + HW_FEATURE_IMAGES_IN_FRAGMENT_SHADERS, + HW_FEATURE_TEST4_DATUM_MODE, + HW_FEATURE_NEXT_INSTRUCTION_TYPE, + HW_FEATURE_BRNDOUT_KILL, + HW_FEATURE_WARPING, + HW_FEATURE_V4, + HW_FEATURE_FLUSH_REDUCTION, + HW_FEATURE_PROTECTED_MODE, + HW_FEATURE_COHERENCY_REG, + HW_FEATURE_PROTECTED_DEBUG_MODE, + HW_FEATURE_AARCH64_MMU, + HW_FEATURE_TLS_HASHING, + HW_FEATURE_THREAD_GROUP_SPLIT, + HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG, +}; + +#define hw_features_t600 (\ + BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ + BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ + BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ + BIT_ULL(HW_FEATURE_V4)) + +#define hw_features_t620 (\ + BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ + BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ + BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ + BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ + BIT_ULL(HW_FEATURE_V4)) + +#define hw_features_t720 (\ + BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ + BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ + BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ + BIT_ULL(HW_FEATURE_OPTIMIZED_COVERAGE_MASK) | \ + BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ + BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ + BIT_ULL(HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4) | \ + BIT_ULL(HW_FEATURE_WARPING) | \ + BIT_ULL(HW_FEATURE_V4)) + + +#define hw_features_t760 (\ + BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ + BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ + BIT_ULL(HW_FEATURE_XAFFINITY) | \ + BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ + BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ + BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ + BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ + BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ + BIT_ULL(HW_FEATURE_MRT) | \ + BIT_ULL(HW_FEATURE_MSAA_16X) | \ + BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ + BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ + BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ + BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT)) + +// T860 +#define hw_features_t860 (\ + BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ + BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ + BIT_ULL(HW_FEATURE_XAFFINITY) | \ + BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ + BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ + BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ + BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ + BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ + BIT_ULL(HW_FEATURE_MRT) | \ + BIT_ULL(HW_FEATURE_MSAA_16X) | \ + BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ + BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ + BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ + BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ + BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT)) + +#define hw_features_t880 hw_features_t860 + +#define hw_features_t830 (\ + BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ + BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ + BIT_ULL(HW_FEATURE_XAFFINITY) | \ + BIT_ULL(HW_FEATURE_WARPING) | \ + BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ + BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ + BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ + BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ + BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ + BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ + BIT_ULL(HW_FEATURE_MRT) | \ + BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ + BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ + BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ + BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ + BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT)) + +#define hw_features_t820 (\ + BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ + BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ + BIT_ULL(HW_FEATURE_XAFFINITY) | \ + BIT_ULL(HW_FEATURE_WARPING) | \ + BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ + BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ + BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ + BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ + BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ + BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ + BIT_ULL(HW_FEATURE_MRT) | \ + BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ + BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ + BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ + BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ + BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT)) + +#define hw_features_g71 (\ + BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ + BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ + BIT_ULL(HW_FEATURE_XAFFINITY) | \ + BIT_ULL(HW_FEATURE_WARPING) | \ + BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ + BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ + BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ + BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ + BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ + BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ + BIT_ULL(HW_FEATURE_MRT) | \ + BIT_ULL(HW_FEATURE_MSAA_16X) | \ + BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ + BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ + BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ + BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ + BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ + BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \ + BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \ + BIT_ULL(HW_FEATURE_COHERENCY_REG)) + +#define hw_features_g72 (\ + BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ + BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ + BIT_ULL(HW_FEATURE_XAFFINITY) | \ + BIT_ULL(HW_FEATURE_WARPING) | \ + BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ + BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ + BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ + BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ + BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ + BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ + BIT_ULL(HW_FEATURE_MRT) | \ + BIT_ULL(HW_FEATURE_MSAA_16X) | \ + BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ + BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ + BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ + BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ + BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ + BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \ + BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \ + BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \ + BIT_ULL(HW_FEATURE_COHERENCY_REG)) + +#define hw_features_g51 (\ + BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ + BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ + BIT_ULL(HW_FEATURE_XAFFINITY) | \ + BIT_ULL(HW_FEATURE_WARPING) | \ + BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ + BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ + BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ + BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ + BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ + BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ + BIT_ULL(HW_FEATURE_MRT) | \ + BIT_ULL(HW_FEATURE_MSAA_16X) | \ + BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ + BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ + BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ + BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ + BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ + BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \ + BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \ + BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \ + BIT_ULL(HW_FEATURE_COHERENCY_REG)) + +#define hw_features_g52 (\ + BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ + BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ + BIT_ULL(HW_FEATURE_XAFFINITY) | \ + BIT_ULL(HW_FEATURE_WARPING) | \ + BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ + BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ + BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ + BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ + BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ + BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ + BIT_ULL(HW_FEATURE_MRT) | \ + BIT_ULL(HW_FEATURE_MSAA_16X) | \ + BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ + BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ + BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ + BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ + BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ + BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \ + BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \ + BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \ + BIT_ULL(HW_FEATURE_COHERENCY_REG)) + +#define hw_features_g76 (\ + BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ + BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ + BIT_ULL(HW_FEATURE_XAFFINITY) | \ + BIT_ULL(HW_FEATURE_WARPING) | \ + BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ + BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ + BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ + BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ + BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ + BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ + BIT_ULL(HW_FEATURE_MRT) | \ + BIT_ULL(HW_FEATURE_MSAA_16X) | \ + BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ + BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ + BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ + BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ + BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ + BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \ + BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \ + BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \ + BIT_ULL(HW_FEATURE_COHERENCY_REG) | \ + BIT_ULL(HW_FEATURE_AARCH64_MMU) | \ + BIT_ULL(HW_FEATURE_TLS_HASHING) | \ + BIT_ULL(HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG)) + +#define hw_features_g31 (\ + BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ + BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ + BIT_ULL(HW_FEATURE_XAFFINITY) | \ + BIT_ULL(HW_FEATURE_WARPING) | \ + BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ + BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ + BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ + BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ + BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ + BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ + BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ + BIT_ULL(HW_FEATURE_MRT) | \ + BIT_ULL(HW_FEATURE_MSAA_16X) | \ + BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ + BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ + BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ + BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ + BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ + BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \ + BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \ + BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \ + BIT_ULL(HW_FEATURE_COHERENCY_REG) | \ + BIT_ULL(HW_FEATURE_AARCH64_MMU) | \ + BIT_ULL(HW_FEATURE_TLS_HASHING) | \ + BIT_ULL(HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG)) + +static inline bool panfrost_has_hw_feature(struct panfrost_device *pfdev, + enum panfrost_hw_feature feat) +{ + return test_bit(feat, pfdev->features.hw_features); +} + +#endif diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c new file mode 100644 index 000000000..c843fbfdb --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -0,0 +1,271 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */ + +#include <linux/err.h> +#include <linux/slab.h> +#include <linux/dma-buf.h> +#include <linux/dma-mapping.h> + +#include <drm/panfrost_drm.h> +#include "panfrost_device.h" +#include "panfrost_gem.h" +#include "panfrost_mmu.h" + +/* Called DRM core on the last userspace/kernel unreference of the + * BO. + */ +static void panfrost_gem_free_object(struct drm_gem_object *obj) +{ + struct panfrost_gem_object *bo = to_panfrost_bo(obj); + struct panfrost_device *pfdev = obj->dev->dev_private; + + /* + * Make sure the BO is no longer inserted in the shrinker list before + * taking care of the destruction itself. If we don't do that we have a + * race condition between this function and what's done in + * panfrost_gem_shrinker_scan(). + */ + mutex_lock(&pfdev->shrinker_lock); + list_del_init(&bo->base.madv_list); + mutex_unlock(&pfdev->shrinker_lock); + + /* + * If we still have mappings attached to the BO, there's a problem in + * our refcounting. + */ + WARN_ON_ONCE(!list_empty(&bo->mappings.list)); + + if (bo->sgts) { + int i; + int n_sgt = bo->base.base.size / SZ_2M; + + for (i = 0; i < n_sgt; i++) { + if (bo->sgts[i].sgl) { + dma_unmap_sgtable(pfdev->dev, &bo->sgts[i], + DMA_BIDIRECTIONAL, 0); + sg_free_table(&bo->sgts[i]); + } + } + kvfree(bo->sgts); + } + + drm_gem_shmem_free_object(obj); +} + +struct panfrost_gem_mapping * +panfrost_gem_mapping_get(struct panfrost_gem_object *bo, + struct panfrost_file_priv *priv) +{ + struct panfrost_gem_mapping *iter, *mapping = NULL; + + mutex_lock(&bo->mappings.lock); + list_for_each_entry(iter, &bo->mappings.list, node) { + if (iter->mmu == priv->mmu) { + kref_get(&iter->refcount); + mapping = iter; + break; + } + } + mutex_unlock(&bo->mappings.lock); + + return mapping; +} + +static void +panfrost_gem_teardown_mapping(struct panfrost_gem_mapping *mapping) +{ + if (mapping->active) + panfrost_mmu_unmap(mapping); + + spin_lock(&mapping->mmu->mm_lock); + if (drm_mm_node_allocated(&mapping->mmnode)) + drm_mm_remove_node(&mapping->mmnode); + spin_unlock(&mapping->mmu->mm_lock); +} + +static void panfrost_gem_mapping_release(struct kref *kref) +{ + struct panfrost_gem_mapping *mapping; + + mapping = container_of(kref, struct panfrost_gem_mapping, refcount); + + panfrost_gem_teardown_mapping(mapping); + drm_gem_object_put(&mapping->obj->base.base); + panfrost_mmu_ctx_put(mapping->mmu); + kfree(mapping); +} + +void panfrost_gem_mapping_put(struct panfrost_gem_mapping *mapping) +{ + if (!mapping) + return; + + kref_put(&mapping->refcount, panfrost_gem_mapping_release); +} + +void panfrost_gem_teardown_mappings_locked(struct panfrost_gem_object *bo) +{ + struct panfrost_gem_mapping *mapping; + + list_for_each_entry(mapping, &bo->mappings.list, node) + panfrost_gem_teardown_mapping(mapping); +} + +int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_priv) +{ + int ret; + size_t size = obj->size; + u64 align; + struct panfrost_gem_object *bo = to_panfrost_bo(obj); + unsigned long color = bo->noexec ? PANFROST_BO_NOEXEC : 0; + struct panfrost_file_priv *priv = file_priv->driver_priv; + struct panfrost_gem_mapping *mapping; + + mapping = kzalloc(sizeof(*mapping), GFP_KERNEL); + if (!mapping) + return -ENOMEM; + + INIT_LIST_HEAD(&mapping->node); + kref_init(&mapping->refcount); + drm_gem_object_get(obj); + mapping->obj = bo; + + /* + * Executable buffers cannot cross a 16MB boundary as the program + * counter is 24-bits. We assume executable buffers will be less than + * 16MB and aligning executable buffers to their size will avoid + * crossing a 16MB boundary. + */ + if (!bo->noexec) + align = size >> PAGE_SHIFT; + else + align = size >= SZ_2M ? SZ_2M >> PAGE_SHIFT : 0; + + mapping->mmu = panfrost_mmu_ctx_get(priv->mmu); + spin_lock(&mapping->mmu->mm_lock); + ret = drm_mm_insert_node_generic(&mapping->mmu->mm, &mapping->mmnode, + size >> PAGE_SHIFT, align, color, 0); + spin_unlock(&mapping->mmu->mm_lock); + if (ret) + goto err; + + if (!bo->is_heap) { + ret = panfrost_mmu_map(mapping); + if (ret) + goto err; + } + + mutex_lock(&bo->mappings.lock); + WARN_ON(bo->base.madv != PANFROST_MADV_WILLNEED); + list_add_tail(&mapping->node, &bo->mappings.list); + mutex_unlock(&bo->mappings.lock); + +err: + if (ret) + panfrost_gem_mapping_put(mapping); + return ret; +} + +void panfrost_gem_close(struct drm_gem_object *obj, struct drm_file *file_priv) +{ + struct panfrost_file_priv *priv = file_priv->driver_priv; + struct panfrost_gem_object *bo = to_panfrost_bo(obj); + struct panfrost_gem_mapping *mapping = NULL, *iter; + + mutex_lock(&bo->mappings.lock); + list_for_each_entry(iter, &bo->mappings.list, node) { + if (iter->mmu == priv->mmu) { + mapping = iter; + list_del(&iter->node); + break; + } + } + mutex_unlock(&bo->mappings.lock); + + panfrost_gem_mapping_put(mapping); +} + +static int panfrost_gem_pin(struct drm_gem_object *obj) +{ + if (to_panfrost_bo(obj)->is_heap) + return -EINVAL; + + return drm_gem_shmem_pin(obj); +} + +static const struct drm_gem_object_funcs panfrost_gem_funcs = { + .free = panfrost_gem_free_object, + .open = panfrost_gem_open, + .close = panfrost_gem_close, + .print_info = drm_gem_shmem_print_info, + .pin = panfrost_gem_pin, + .unpin = drm_gem_shmem_unpin, + .get_sg_table = drm_gem_shmem_get_sg_table, + .vmap = drm_gem_shmem_vmap, + .vunmap = drm_gem_shmem_vunmap, + .mmap = drm_gem_shmem_mmap, +}; + +/** + * panfrost_gem_create_object - Implementation of driver->gem_create_object. + * @dev: DRM device + * @size: Size in bytes of the memory the object will reference + * + * This lets the GEM helpers allocate object structs for us, and keep + * our BO stats correct. + */ +struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t size) +{ + struct panfrost_device *pfdev = dev->dev_private; + struct panfrost_gem_object *obj; + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return NULL; + + INIT_LIST_HEAD(&obj->mappings.list); + mutex_init(&obj->mappings.lock); + obj->base.base.funcs = &panfrost_gem_funcs; + obj->base.map_cached = pfdev->coherent; + + return &obj->base.base; +} + +struct panfrost_gem_object * +panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags) +{ + struct drm_gem_shmem_object *shmem; + struct panfrost_gem_object *bo; + + /* Round up heap allocations to 2MB to keep fault handling simple */ + if (flags & PANFROST_BO_HEAP) + size = roundup(size, SZ_2M); + + shmem = drm_gem_shmem_create(dev, size); + if (IS_ERR(shmem)) + return ERR_CAST(shmem); + + bo = to_panfrost_bo(&shmem->base); + bo->noexec = !!(flags & PANFROST_BO_NOEXEC); + bo->is_heap = !!(flags & PANFROST_BO_HEAP); + + return bo; +} + +struct drm_gem_object * +panfrost_gem_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sgt) +{ + struct drm_gem_object *obj; + struct panfrost_gem_object *bo; + + obj = drm_gem_shmem_prime_import_sg_table(dev, attach, sgt); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + bo = to_panfrost_bo(obj); + bo->noexec = true; + + return obj; +} diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h new file mode 100644 index 000000000..ad2877eee --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */ + +#ifndef __PANFROST_GEM_H__ +#define __PANFROST_GEM_H__ + +#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_mm.h> + +struct panfrost_mmu; + +struct panfrost_gem_object { + struct drm_gem_shmem_object base; + struct sg_table *sgts; + + /* + * Use a list for now. If searching a mapping ever becomes the + * bottleneck, we should consider using an RB-tree, or even better, + * let the core store drm_gem_object_mapping entries (where we + * could place driver specific data) instead of drm_gem_object ones + * in its drm_file->object_idr table. + * + * struct drm_gem_object_mapping { + * struct drm_gem_object *obj; + * void *driver_priv; + * }; + */ + struct { + struct list_head list; + struct mutex lock; + } mappings; + + /* + * Count the number of jobs referencing this BO so we don't let the + * shrinker reclaim this object prematurely. + */ + atomic_t gpu_usecount; + + bool noexec :1; + bool is_heap :1; +}; + +struct panfrost_gem_mapping { + struct list_head node; + struct kref refcount; + struct panfrost_gem_object *obj; + struct drm_mm_node mmnode; + struct panfrost_mmu *mmu; + bool active :1; +}; + +static inline +struct panfrost_gem_object *to_panfrost_bo(struct drm_gem_object *obj) +{ + return container_of(to_drm_gem_shmem_obj(obj), struct panfrost_gem_object, base); +} + +static inline struct panfrost_gem_mapping * +drm_mm_node_to_panfrost_mapping(struct drm_mm_node *node) +{ + return container_of(node, struct panfrost_gem_mapping, mmnode); +} + +struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t size); + +struct drm_gem_object * +panfrost_gem_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sgt); + +struct panfrost_gem_object * +panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags); + +int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_priv); +void panfrost_gem_close(struct drm_gem_object *obj, + struct drm_file *file_priv); + +struct panfrost_gem_mapping * +panfrost_gem_mapping_get(struct panfrost_gem_object *bo, + struct panfrost_file_priv *priv); +void panfrost_gem_mapping_put(struct panfrost_gem_mapping *mapping); +void panfrost_gem_teardown_mappings_locked(struct panfrost_gem_object *bo); + +void panfrost_gem_shrinker_init(struct drm_device *dev); +void panfrost_gem_shrinker_cleanup(struct drm_device *dev); + +#endif /* __PANFROST_GEM_H__ */ diff --git a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c new file mode 100644 index 000000000..1b9f68d8e --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c @@ -0,0 +1,122 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019 Arm Ltd. + * + * Based on msm_gem_freedreno.c: + * Copyright (C) 2016 Red Hat + * Author: Rob Clark <robdclark@gmail.com> + */ + +#include <linux/list.h> + +#include <drm/drm_device.h> +#include <drm/drm_gem_shmem_helper.h> + +#include "panfrost_device.h" +#include "panfrost_gem.h" +#include "panfrost_mmu.h" + +static unsigned long +panfrost_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) +{ + struct panfrost_device *pfdev = + container_of(shrinker, struct panfrost_device, shrinker); + struct drm_gem_shmem_object *shmem; + unsigned long count = 0; + + if (!mutex_trylock(&pfdev->shrinker_lock)) + return 0; + + list_for_each_entry(shmem, &pfdev->shrinker_list, madv_list) { + if (drm_gem_shmem_is_purgeable(shmem)) + count += shmem->base.size >> PAGE_SHIFT; + } + + mutex_unlock(&pfdev->shrinker_lock); + + return count; +} + +static bool panfrost_gem_purge(struct drm_gem_object *obj) +{ + struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); + struct panfrost_gem_object *bo = to_panfrost_bo(obj); + bool ret = false; + + if (atomic_read(&bo->gpu_usecount)) + return false; + + if (!mutex_trylock(&bo->mappings.lock)) + return false; + + if (!mutex_trylock(&shmem->pages_lock)) + goto unlock_mappings; + + panfrost_gem_teardown_mappings_locked(bo); + drm_gem_shmem_purge_locked(obj); + ret = true; + + mutex_unlock(&shmem->pages_lock); + +unlock_mappings: + mutex_unlock(&bo->mappings.lock); + return ret; +} + +static unsigned long +panfrost_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) +{ + struct panfrost_device *pfdev = + container_of(shrinker, struct panfrost_device, shrinker); + struct drm_gem_shmem_object *shmem, *tmp; + unsigned long freed = 0; + + if (!mutex_trylock(&pfdev->shrinker_lock)) + return SHRINK_STOP; + + list_for_each_entry_safe(shmem, tmp, &pfdev->shrinker_list, madv_list) { + if (freed >= sc->nr_to_scan) + break; + if (drm_gem_shmem_is_purgeable(shmem) && + panfrost_gem_purge(&shmem->base)) { + freed += shmem->base.size >> PAGE_SHIFT; + list_del_init(&shmem->madv_list); + } + } + + mutex_unlock(&pfdev->shrinker_lock); + + if (freed > 0) + pr_info_ratelimited("Purging %lu bytes\n", freed << PAGE_SHIFT); + + return freed; +} + +/** + * panfrost_gem_shrinker_init - Initialize panfrost shrinker + * @dev: DRM device + * + * This function registers and sets up the panfrost shrinker. + */ +void panfrost_gem_shrinker_init(struct drm_device *dev) +{ + struct panfrost_device *pfdev = dev->dev_private; + pfdev->shrinker.count_objects = panfrost_gem_shrinker_count; + pfdev->shrinker.scan_objects = panfrost_gem_shrinker_scan; + pfdev->shrinker.seeks = DEFAULT_SEEKS; + WARN_ON(register_shrinker(&pfdev->shrinker)); +} + +/** + * panfrost_gem_shrinker_cleanup - Clean up panfrost shrinker + * @dev: DRM device + * + * This function unregisters the panfrost shrinker. + */ +void panfrost_gem_shrinker_cleanup(struct drm_device *dev) +{ + struct panfrost_device *pfdev = dev->dev_private; + + if (pfdev->shrinker.nr_deferred) { + unregister_shrinker(&pfdev->shrinker); + } +} diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c new file mode 100644 index 000000000..107ad2d76 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c @@ -0,0 +1,404 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */ +/* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */ +/* Copyright 2019 Collabora ltd. */ +#include <linux/bitfield.h> +#include <linux/bitmap.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/iopoll.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> + +#include "panfrost_device.h" +#include "panfrost_features.h" +#include "panfrost_issues.h" +#include "panfrost_gpu.h" +#include "panfrost_perfcnt.h" +#include "panfrost_regs.h" + +static irqreturn_t panfrost_gpu_irq_handler(int irq, void *data) +{ + struct panfrost_device *pfdev = data; + u32 state = gpu_read(pfdev, GPU_INT_STAT); + u32 fault_status = gpu_read(pfdev, GPU_FAULT_STATUS); + + if (!state) + return IRQ_NONE; + + if (state & GPU_IRQ_MASK_ERROR) { + u64 address = (u64) gpu_read(pfdev, GPU_FAULT_ADDRESS_HI) << 32; + address |= gpu_read(pfdev, GPU_FAULT_ADDRESS_LO); + + dev_warn(pfdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx\n", + fault_status & 0xFF, panfrost_exception_name(pfdev, fault_status), + address); + + if (state & GPU_IRQ_MULTIPLE_FAULT) + dev_warn(pfdev->dev, "There were multiple GPU faults - some have not been reported\n"); + + gpu_write(pfdev, GPU_INT_MASK, 0); + } + + if (state & GPU_IRQ_PERFCNT_SAMPLE_COMPLETED) + panfrost_perfcnt_sample_done(pfdev); + + if (state & GPU_IRQ_CLEAN_CACHES_COMPLETED) + panfrost_perfcnt_clean_cache_done(pfdev); + + gpu_write(pfdev, GPU_INT_CLEAR, state); + + return IRQ_HANDLED; +} + +int panfrost_gpu_soft_reset(struct panfrost_device *pfdev) +{ + int ret; + u32 val; + + gpu_write(pfdev, GPU_INT_MASK, 0); + gpu_write(pfdev, GPU_INT_CLEAR, GPU_IRQ_RESET_COMPLETED); + gpu_write(pfdev, GPU_CMD, GPU_CMD_SOFT_RESET); + + ret = readl_relaxed_poll_timeout(pfdev->iomem + GPU_INT_RAWSTAT, + val, val & GPU_IRQ_RESET_COMPLETED, 100, 10000); + + if (ret) { + dev_err(pfdev->dev, "gpu soft reset timed out\n"); + return ret; + } + + gpu_write(pfdev, GPU_INT_CLEAR, GPU_IRQ_MASK_ALL); + gpu_write(pfdev, GPU_INT_MASK, GPU_IRQ_MASK_ALL); + + return 0; +} + +void panfrost_gpu_amlogic_quirk(struct panfrost_device *pfdev) +{ + /* + * The Amlogic integrated Mali-T820, Mali-G31 & Mali-G52 needs + * these undocumented bits in GPU_PWR_OVERRIDE1 to be set in order + * to operate correctly. + */ + gpu_write(pfdev, GPU_PWR_KEY, GPU_PWR_KEY_UNLOCK); + gpu_write(pfdev, GPU_PWR_OVERRIDE1, 0xfff | (0x20 << 16)); +} + +static void panfrost_gpu_init_quirks(struct panfrost_device *pfdev) +{ + u32 quirks = 0; + + if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8443) || + panfrost_has_hw_issue(pfdev, HW_ISSUE_11035)) + quirks |= SC_LS_PAUSEBUFFER_DISABLE; + + if (panfrost_has_hw_issue(pfdev, HW_ISSUE_10327)) + quirks |= SC_SDC_DISABLE_OQ_DISCARD; + + if (panfrost_has_hw_issue(pfdev, HW_ISSUE_10797)) + quirks |= SC_ENABLE_TEXGRD_FLAGS; + + if (!panfrost_has_hw_issue(pfdev, GPUCORE_1619)) { + if (panfrost_model_cmp(pfdev, 0x750) < 0) /* T60x, T62x, T72x */ + quirks |= SC_LS_ATTR_CHECK_DISABLE; + else if (panfrost_model_cmp(pfdev, 0x880) <= 0) /* T76x, T8xx */ + quirks |= SC_LS_ALLOW_ATTR_TYPES; + } + + if (panfrost_has_hw_feature(pfdev, HW_FEATURE_TLS_HASHING)) + quirks |= SC_TLS_HASH_ENABLE; + + if (quirks) + gpu_write(pfdev, GPU_SHADER_CONFIG, quirks); + + + quirks = gpu_read(pfdev, GPU_TILER_CONFIG); + + /* Set tiler clock gate override if required */ + if (panfrost_has_hw_issue(pfdev, HW_ISSUE_T76X_3953)) + quirks |= TC_CLOCK_GATE_OVERRIDE; + + gpu_write(pfdev, GPU_TILER_CONFIG, quirks); + + + quirks = gpu_read(pfdev, GPU_L2_MMU_CONFIG); + + /* Limit read & write ID width for AXI */ + if (panfrost_has_hw_feature(pfdev, HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG)) + quirks &= ~(L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS | + L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES); + else + quirks &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_READS | + L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES); + + gpu_write(pfdev, GPU_L2_MMU_CONFIG, quirks); + + quirks = 0; + if ((panfrost_model_eq(pfdev, 0x860) || panfrost_model_eq(pfdev, 0x880)) && + pfdev->features.revision >= 0x2000) + quirks |= JM_MAX_JOB_THROTTLE_LIMIT << JM_JOB_THROTTLE_LIMIT_SHIFT; + else if (panfrost_model_eq(pfdev, 0x6000) && + pfdev->features.coherency_features == COHERENCY_ACE) + quirks |= (COHERENCY_ACE_LITE | COHERENCY_ACE) << + JM_FORCE_COHERENCY_FEATURES_SHIFT; + + if (quirks) + gpu_write(pfdev, GPU_JM_CONFIG, quirks); + + /* Here goes platform specific quirks */ + if (pfdev->comp->vendor_quirk) + pfdev->comp->vendor_quirk(pfdev); +} + +#define MAX_HW_REVS 6 + +struct panfrost_model { + const char *name; + u32 id; + u32 id_mask; + u64 features; + u64 issues; + struct { + u32 revision; + u64 issues; + } revs[MAX_HW_REVS]; +}; + +#define GPU_MODEL(_name, _id, ...) \ +{\ + .name = __stringify(_name), \ + .id = _id, \ + .features = hw_features_##_name, \ + .issues = hw_issues_##_name, \ + .revs = { __VA_ARGS__ }, \ +} + +#define GPU_REV_EXT(name, _rev, _p, _s, stat) \ +{\ + .revision = (_rev) << 12 | (_p) << 4 | (_s), \ + .issues = hw_issues_##name##_r##_rev##p##_p##stat, \ +} +#define GPU_REV(name, r, p) GPU_REV_EXT(name, r, p, 0, ) + +static const struct panfrost_model gpu_models[] = { + /* T60x has an oddball version */ + GPU_MODEL(t600, 0x600, + GPU_REV_EXT(t600, 0, 0, 1, _15dev0)), + GPU_MODEL(t620, 0x620, + GPU_REV(t620, 0, 1), GPU_REV(t620, 1, 0)), + GPU_MODEL(t720, 0x720), + GPU_MODEL(t760, 0x750, + GPU_REV(t760, 0, 0), GPU_REV(t760, 0, 1), + GPU_REV_EXT(t760, 0, 1, 0, _50rel0), + GPU_REV(t760, 0, 2), GPU_REV(t760, 0, 3)), + GPU_MODEL(t820, 0x820), + GPU_MODEL(t830, 0x830), + GPU_MODEL(t860, 0x860), + GPU_MODEL(t880, 0x880), + + GPU_MODEL(g71, 0x6000, + GPU_REV_EXT(g71, 0, 0, 1, _05dev0)), + GPU_MODEL(g72, 0x6001), + GPU_MODEL(g51, 0x7000), + GPU_MODEL(g76, 0x7001), + GPU_MODEL(g52, 0x7002), + GPU_MODEL(g31, 0x7003, + GPU_REV(g31, 1, 0)), +}; + +static void panfrost_gpu_init_features(struct panfrost_device *pfdev) +{ + u32 gpu_id, num_js, major, minor, status, rev; + const char *name = "unknown"; + u64 hw_feat = 0; + u64 hw_issues = hw_issues_all; + const struct panfrost_model *model; + int i; + + pfdev->features.l2_features = gpu_read(pfdev, GPU_L2_FEATURES); + pfdev->features.core_features = gpu_read(pfdev, GPU_CORE_FEATURES); + pfdev->features.tiler_features = gpu_read(pfdev, GPU_TILER_FEATURES); + pfdev->features.mem_features = gpu_read(pfdev, GPU_MEM_FEATURES); + pfdev->features.mmu_features = gpu_read(pfdev, GPU_MMU_FEATURES); + pfdev->features.thread_features = gpu_read(pfdev, GPU_THREAD_FEATURES); + pfdev->features.max_threads = gpu_read(pfdev, GPU_THREAD_MAX_THREADS); + pfdev->features.thread_max_workgroup_sz = gpu_read(pfdev, GPU_THREAD_MAX_WORKGROUP_SIZE); + pfdev->features.thread_max_barrier_sz = gpu_read(pfdev, GPU_THREAD_MAX_BARRIER_SIZE); + pfdev->features.coherency_features = gpu_read(pfdev, GPU_COHERENCY_FEATURES); + for (i = 0; i < 4; i++) + pfdev->features.texture_features[i] = gpu_read(pfdev, GPU_TEXTURE_FEATURES(i)); + + pfdev->features.as_present = gpu_read(pfdev, GPU_AS_PRESENT); + + pfdev->features.js_present = gpu_read(pfdev, GPU_JS_PRESENT); + num_js = hweight32(pfdev->features.js_present); + for (i = 0; i < num_js; i++) + pfdev->features.js_features[i] = gpu_read(pfdev, GPU_JS_FEATURES(i)); + + pfdev->features.shader_present = gpu_read(pfdev, GPU_SHADER_PRESENT_LO); + pfdev->features.shader_present |= (u64)gpu_read(pfdev, GPU_SHADER_PRESENT_HI) << 32; + + pfdev->features.tiler_present = gpu_read(pfdev, GPU_TILER_PRESENT_LO); + pfdev->features.tiler_present |= (u64)gpu_read(pfdev, GPU_TILER_PRESENT_HI) << 32; + + pfdev->features.l2_present = gpu_read(pfdev, GPU_L2_PRESENT_LO); + pfdev->features.l2_present |= (u64)gpu_read(pfdev, GPU_L2_PRESENT_HI) << 32; + pfdev->features.nr_core_groups = hweight64(pfdev->features.l2_present); + + pfdev->features.stack_present = gpu_read(pfdev, GPU_STACK_PRESENT_LO); + pfdev->features.stack_present |= (u64)gpu_read(pfdev, GPU_STACK_PRESENT_HI) << 32; + + pfdev->features.thread_tls_alloc = gpu_read(pfdev, GPU_THREAD_TLS_ALLOC); + + gpu_id = gpu_read(pfdev, GPU_ID); + pfdev->features.revision = gpu_id & 0xffff; + pfdev->features.id = gpu_id >> 16; + + /* The T60x has an oddball ID value. Fix it up to the standard Midgard + * format so we (and userspace) don't have to special case it. + */ + if (pfdev->features.id == 0x6956) + pfdev->features.id = 0x0600; + + major = (pfdev->features.revision >> 12) & 0xf; + minor = (pfdev->features.revision >> 4) & 0xff; + status = pfdev->features.revision & 0xf; + rev = pfdev->features.revision; + + gpu_id = pfdev->features.id; + + for (model = gpu_models; model->name; model++) { + int best = -1; + + if (!panfrost_model_eq(pfdev, model->id)) + continue; + + name = model->name; + hw_feat = model->features; + hw_issues |= model->issues; + for (i = 0; i < MAX_HW_REVS; i++) { + if (model->revs[i].revision == rev) { + best = i; + break; + } else if (model->revs[i].revision == (rev & ~0xf)) + best = i; + } + + if (best >= 0) + hw_issues |= model->revs[best].issues; + + break; + } + + bitmap_from_u64(pfdev->features.hw_features, hw_feat); + bitmap_from_u64(pfdev->features.hw_issues, hw_issues); + + dev_info(pfdev->dev, "mali-%s id 0x%x major 0x%x minor 0x%x status 0x%x", + name, gpu_id, major, minor, status); + dev_info(pfdev->dev, "features: %64pb, issues: %64pb", + pfdev->features.hw_features, + pfdev->features.hw_issues); + + dev_info(pfdev->dev, "Features: L2:0x%08x Shader:0x%08x Tiler:0x%08x Mem:0x%0x MMU:0x%08x AS:0x%x JS:0x%x", + pfdev->features.l2_features, + pfdev->features.core_features, + pfdev->features.tiler_features, + pfdev->features.mem_features, + pfdev->features.mmu_features, + pfdev->features.as_present, + pfdev->features.js_present); + + dev_info(pfdev->dev, "shader_present=0x%0llx l2_present=0x%0llx", + pfdev->features.shader_present, pfdev->features.l2_present); +} + +void panfrost_gpu_power_on(struct panfrost_device *pfdev) +{ + int ret; + u32 val; + + panfrost_gpu_init_quirks(pfdev); + + /* Just turn on everything for now */ + gpu_write(pfdev, L2_PWRON_LO, pfdev->features.l2_present); + ret = readl_relaxed_poll_timeout(pfdev->iomem + L2_READY_LO, + val, val == pfdev->features.l2_present, 100, 20000); + if (ret) + dev_err(pfdev->dev, "error powering up gpu L2"); + + gpu_write(pfdev, SHADER_PWRON_LO, pfdev->features.shader_present); + ret = readl_relaxed_poll_timeout(pfdev->iomem + SHADER_READY_LO, + val, val == pfdev->features.shader_present, 100, 20000); + if (ret) + dev_err(pfdev->dev, "error powering up gpu shader"); + + gpu_write(pfdev, TILER_PWRON_LO, pfdev->features.tiler_present); + ret = readl_relaxed_poll_timeout(pfdev->iomem + TILER_READY_LO, + val, val == pfdev->features.tiler_present, 100, 1000); + if (ret) + dev_err(pfdev->dev, "error powering up gpu tiler"); +} + +void panfrost_gpu_power_off(struct panfrost_device *pfdev) +{ + gpu_write(pfdev, TILER_PWROFF_LO, 0); + gpu_write(pfdev, SHADER_PWROFF_LO, 0); + gpu_write(pfdev, L2_PWROFF_LO, 0); +} + +int panfrost_gpu_init(struct panfrost_device *pfdev) +{ + int err, irq; + + err = panfrost_gpu_soft_reset(pfdev); + if (err) + return err; + + panfrost_gpu_init_features(pfdev); + + err = dma_set_mask_and_coherent(pfdev->dev, + DMA_BIT_MASK(FIELD_GET(0xff00, pfdev->features.mmu_features))); + if (err) + return err; + + dma_set_max_seg_size(pfdev->dev, UINT_MAX); + + irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "gpu"); + if (irq <= 0) + return -ENODEV; + + err = devm_request_irq(pfdev->dev, irq, panfrost_gpu_irq_handler, + IRQF_SHARED, KBUILD_MODNAME "-gpu", pfdev); + if (err) { + dev_err(pfdev->dev, "failed to request gpu irq"); + return err; + } + + panfrost_gpu_power_on(pfdev); + + return 0; +} + +void panfrost_gpu_fini(struct panfrost_device *pfdev) +{ + panfrost_gpu_power_off(pfdev); +} + +u32 panfrost_gpu_get_latest_flush_id(struct panfrost_device *pfdev) +{ + u32 flush_id; + + if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION)) { + /* Flush reduction only makes sense when the GPU is kept powered on between jobs */ + if (pm_runtime_get_if_in_use(pfdev->dev)) { + flush_id = gpu_read(pfdev, GPU_LATEST_FLUSH_ID); + pm_runtime_put(pfdev->dev); + return flush_id; + } + } + + return 0; +} diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.h b/drivers/gpu/drm/panfrost/panfrost_gpu.h new file mode 100644 index 000000000..468c51e7e --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */ +/* Copyright 2019 Collabora ltd. */ + +#ifndef __PANFROST_GPU_H__ +#define __PANFROST_GPU_H__ + +struct panfrost_device; + +int panfrost_gpu_init(struct panfrost_device *pfdev); +void panfrost_gpu_fini(struct panfrost_device *pfdev); + +u32 panfrost_gpu_get_latest_flush_id(struct panfrost_device *pfdev); + +int panfrost_gpu_soft_reset(struct panfrost_device *pfdev); +void panfrost_gpu_power_on(struct panfrost_device *pfdev); +void panfrost_gpu_power_off(struct panfrost_device *pfdev); + +void panfrost_gpu_amlogic_quirk(struct panfrost_device *pfdev); + +#endif diff --git a/drivers/gpu/drm/panfrost/panfrost_issues.h b/drivers/gpu/drm/panfrost/panfrost_issues.h new file mode 100644 index 000000000..8e59d765b --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_issues.h @@ -0,0 +1,257 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. */ +/* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */ +#ifndef __PANFROST_ISSUES_H__ +#define __PANFROST_ISSUES_H__ + +#include <linux/bitops.h> + +#include "panfrost_device.h" + +/* + * This is not a complete list of issues, but only the ones the driver needs + * to care about. + */ +enum panfrost_hw_issue { + /* Need way to guarantee that all previously-translated memory accesses + * are commited */ + HW_ISSUE_6367, + + /* On job complete with non-done the cache is not flushed */ + HW_ISSUE_6787, + + /* Write of PRFCNT_CONFIG_MODE_MANUAL to PRFCNT_CONFIG causes a + * instrumentation dump if PRFCNT_TILER_EN is enabled */ + HW_ISSUE_8186, + + /* TIB: Reports faults from a vtile which has not yet been allocated */ + HW_ISSUE_8245, + + /* uTLB deadlock could occur when writing to an invalid page at the + * same time as access to a valid page in the same uTLB cache line ( == + * 4 PTEs == 16K block of mapping) */ + HW_ISSUE_8316, + + /* HT: TERMINATE for RUN command ignored if previous LOAD_DESCRIPTOR is + * still executing */ + HW_ISSUE_8394, + + /* CSE: Sends a TERMINATED response for a task that should not be + * terminated */ + HW_ISSUE_8401, + + /* Repeatedly Soft-stopping a job chain consisting of (Vertex Shader, + * Cache Flush, Tiler) jobs causes DATA_INVALID_FAULT on tiler job. */ + HW_ISSUE_8408, + + /* Disable the Pause Buffer in the LS pipe. */ + HW_ISSUE_8443, + + /* Change in RMUs in use causes problems related with the core's SDC */ + HW_ISSUE_8987, + + /* Compute endpoint has a 4-deep queue of tasks, meaning a soft stop + * won't complete until all 4 tasks have completed */ + HW_ISSUE_9435, + + /* HT: Tiler returns TERMINATED for non-terminated command */ + HW_ISSUE_9510, + + /* Occasionally the GPU will issue multiple page faults for the same + * address before the MMU page table has been read by the GPU */ + HW_ISSUE_9630, + + /* RA DCD load request to SDC returns invalid load ignore causing + * colour buffer mismatch */ + HW_ISSUE_10327, + + /* MMU TLB invalidation hazards */ + HW_ISSUE_10649, + + /* Missing cache flush in multi core-group configuration */ + HW_ISSUE_10676, + + /* Chicken bit on T72X for a hardware workaround in compiler */ + HW_ISSUE_10797, + + /* Soft-stopping fragment jobs might fail with TILE_RANGE_FAULT */ + HW_ISSUE_10817, + + /* Intermittent missing interrupt on job completion */ + HW_ISSUE_10883, + + /* Soft-stopping fragment jobs might fail with TILE_RANGE_ERROR + * (similar to issue 10817) and can use #10817 workaround */ + HW_ISSUE_10959, + + /* Soft-stopped fragment shader job can restart with out-of-bound + * restart index */ + HW_ISSUE_10969, + + /* Race condition can cause tile list corruption */ + HW_ISSUE_11020, + + /* Write buffer can cause tile list corruption */ + HW_ISSUE_11024, + + /* Pause buffer can cause a fragment job hang */ + HW_ISSUE_11035, + + /* Dynamic Core Scaling not supported due to errata */ + HW_ISSUE_11056, + + /* Clear encoder state for a hard stopped fragment job which is AFBC + * encoded by soft resetting the GPU. Only for T76X r0p0, r0p1 and + * r0p1_50rel0 */ + HW_ISSUE_T76X_3542, + + /* Keep tiler module clock on to prevent GPU stall */ + HW_ISSUE_T76X_3953, + + /* Must ensure L2 is not transitioning when we reset. Workaround with a + * busy wait until L2 completes transition; ensure there is a maximum + * loop count as she may never complete her transition. (On chips + * without this errata, it's totally okay if L2 transitions.) */ + HW_ISSUE_TMIX_8463, + + /* Don't set SC_LS_ATTR_CHECK_DISABLE/SC_LS_ALLOW_ATTR_TYPES */ + GPUCORE_1619, + + /* When a hard-stop follows close after a soft-stop, the completion + * code for the terminated job may be incorrectly set to STOPPED */ + HW_ISSUE_TMIX_8438, + + /* "Protected mode" is buggy on Mali-G31 some Bifrost chips, so the + * kernel must fiddle with L2 caches to prevent data leakage */ + HW_ISSUE_TGOX_R1_1234, + + HW_ISSUE_END +}; + +#define hw_issues_all (\ + BIT_ULL(HW_ISSUE_9435)) + +#define hw_issues_t600 (\ + BIT_ULL(HW_ISSUE_6367) | \ + BIT_ULL(HW_ISSUE_6787) | \ + BIT_ULL(HW_ISSUE_8408) | \ + BIT_ULL(HW_ISSUE_9510) | \ + BIT_ULL(HW_ISSUE_10649) | \ + BIT_ULL(HW_ISSUE_10676) | \ + BIT_ULL(HW_ISSUE_10883) | \ + BIT_ULL(HW_ISSUE_11020) | \ + BIT_ULL(HW_ISSUE_11035) | \ + BIT_ULL(HW_ISSUE_11056) | \ + BIT_ULL(HW_ISSUE_TMIX_8438)) + +#define hw_issues_t600_r0p0_15dev0 (\ + BIT_ULL(HW_ISSUE_8186) | \ + BIT_ULL(HW_ISSUE_8245) | \ + BIT_ULL(HW_ISSUE_8316) | \ + BIT_ULL(HW_ISSUE_8394) | \ + BIT_ULL(HW_ISSUE_8401) | \ + BIT_ULL(HW_ISSUE_8443) | \ + BIT_ULL(HW_ISSUE_8987) | \ + BIT_ULL(HW_ISSUE_9630) | \ + BIT_ULL(HW_ISSUE_10969) | \ + BIT_ULL(GPUCORE_1619)) + +#define hw_issues_t620 (\ + BIT_ULL(HW_ISSUE_10649) | \ + BIT_ULL(HW_ISSUE_10883) | \ + BIT_ULL(HW_ISSUE_10959) | \ + BIT_ULL(HW_ISSUE_11056) | \ + BIT_ULL(HW_ISSUE_TMIX_8438)) + +#define hw_issues_t620_r0p1 (\ + BIT_ULL(HW_ISSUE_10327) | \ + BIT_ULL(HW_ISSUE_10676) | \ + BIT_ULL(HW_ISSUE_10817) | \ + BIT_ULL(HW_ISSUE_11020) | \ + BIT_ULL(HW_ISSUE_11024) | \ + BIT_ULL(HW_ISSUE_11035)) + +#define hw_issues_t620_r1p0 (\ + BIT_ULL(HW_ISSUE_11020) | \ + BIT_ULL(HW_ISSUE_11024)) + +#define hw_issues_t720 (\ + BIT_ULL(HW_ISSUE_10649) | \ + BIT_ULL(HW_ISSUE_10797) | \ + BIT_ULL(HW_ISSUE_10883) | \ + BIT_ULL(HW_ISSUE_11056) | \ + BIT_ULL(HW_ISSUE_TMIX_8438)) + +#define hw_issues_t760 (\ + BIT_ULL(HW_ISSUE_10883) | \ + BIT_ULL(HW_ISSUE_T76X_3953) | \ + BIT_ULL(HW_ISSUE_TMIX_8438)) + +#define hw_issues_t760_r0p0 (\ + BIT_ULL(HW_ISSUE_11020) | \ + BIT_ULL(HW_ISSUE_11024) | \ + BIT_ULL(HW_ISSUE_T76X_3542)) + +#define hw_issues_t760_r0p1 (\ + BIT_ULL(HW_ISSUE_11020) | \ + BIT_ULL(HW_ISSUE_11024) | \ + BIT_ULL(HW_ISSUE_T76X_3542)) + +#define hw_issues_t760_r0p1_50rel0 (\ + BIT_ULL(HW_ISSUE_T76X_3542)) + +#define hw_issues_t760_r0p2 (\ + BIT_ULL(HW_ISSUE_11020) | \ + BIT_ULL(HW_ISSUE_11024) | \ + BIT_ULL(HW_ISSUE_T76X_3542)) + +#define hw_issues_t760_r0p3 (\ + BIT_ULL(HW_ISSUE_T76X_3542)) + +#define hw_issues_t820 (\ + BIT_ULL(HW_ISSUE_10883) | \ + BIT_ULL(HW_ISSUE_T76X_3953) | \ + BIT_ULL(HW_ISSUE_TMIX_8438)) + +#define hw_issues_t830 (\ + BIT_ULL(HW_ISSUE_10883) | \ + BIT_ULL(HW_ISSUE_T76X_3953) | \ + BIT_ULL(HW_ISSUE_TMIX_8438)) + +#define hw_issues_t860 (\ + BIT_ULL(HW_ISSUE_10883) | \ + BIT_ULL(HW_ISSUE_T76X_3953) | \ + BIT_ULL(HW_ISSUE_TMIX_8438)) + +#define hw_issues_t880 (\ + BIT_ULL(HW_ISSUE_10883) | \ + BIT_ULL(HW_ISSUE_T76X_3953) | \ + BIT_ULL(HW_ISSUE_TMIX_8438)) + +#define hw_issues_g31 0 + +#define hw_issues_g31_r1p0 (\ + BIT_ULL(HW_ISSUE_TGOX_R1_1234)) + +#define hw_issues_g51 0 + +#define hw_issues_g52 0 + +#define hw_issues_g71 (\ + BIT_ULL(HW_ISSUE_TMIX_8463) | \ + BIT_ULL(HW_ISSUE_TMIX_8438)) + +#define hw_issues_g71_r0p0_05dev0 (\ + BIT_ULL(HW_ISSUE_T76X_3953)) + +#define hw_issues_g72 0 + +#define hw_issues_g76 0 + +static inline bool panfrost_has_hw_issue(struct panfrost_device *pfdev, + enum panfrost_hw_issue issue) +{ + return test_bit(issue, pfdev->features.hw_issues); +} + +#endif /* __PANFROST_ISSUES_H__ */ diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c new file mode 100644 index 000000000..7e1a5664d --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -0,0 +1,697 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */ +/* Copyright 2019 Collabora ltd. */ +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <linux/dma-resv.h> +#include <drm/gpu_scheduler.h> +#include <drm/panfrost_drm.h> + +#include "panfrost_device.h" +#include "panfrost_devfreq.h" +#include "panfrost_job.h" +#include "panfrost_features.h" +#include "panfrost_issues.h" +#include "panfrost_gem.h" +#include "panfrost_regs.h" +#include "panfrost_gpu.h" +#include "panfrost_mmu.h" + +#define JOB_TIMEOUT_MS 500 + +#define job_write(dev, reg, data) writel(data, dev->iomem + (reg)) +#define job_read(dev, reg) readl(dev->iomem + (reg)) + +enum panfrost_queue_status { + PANFROST_QUEUE_STATUS_ACTIVE, + PANFROST_QUEUE_STATUS_STOPPED, + PANFROST_QUEUE_STATUS_STARTING, + PANFROST_QUEUE_STATUS_FAULT_PENDING, +}; + +struct panfrost_queue_state { + struct drm_gpu_scheduler sched; + atomic_t status; + struct mutex lock; + u64 fence_context; + u64 emit_seqno; +}; + +struct panfrost_job_slot { + struct panfrost_queue_state queue[NUM_JOB_SLOTS]; + spinlock_t job_lock; +}; + +static struct panfrost_job * +to_panfrost_job(struct drm_sched_job *sched_job) +{ + return container_of(sched_job, struct panfrost_job, base); +} + +struct panfrost_fence { + struct dma_fence base; + struct drm_device *dev; + /* panfrost seqno for signaled() test */ + u64 seqno; + int queue; +}; + +static inline struct panfrost_fence * +to_panfrost_fence(struct dma_fence *fence) +{ + return (struct panfrost_fence *)fence; +} + +static const char *panfrost_fence_get_driver_name(struct dma_fence *fence) +{ + return "panfrost"; +} + +static const char *panfrost_fence_get_timeline_name(struct dma_fence *fence) +{ + struct panfrost_fence *f = to_panfrost_fence(fence); + + switch (f->queue) { + case 0: + return "panfrost-js-0"; + case 1: + return "panfrost-js-1"; + case 2: + return "panfrost-js-2"; + default: + return NULL; + } +} + +static const struct dma_fence_ops panfrost_fence_ops = { + .get_driver_name = panfrost_fence_get_driver_name, + .get_timeline_name = panfrost_fence_get_timeline_name, +}; + +static struct dma_fence *panfrost_fence_create(struct panfrost_device *pfdev, int js_num) +{ + struct panfrost_fence *fence; + struct panfrost_job_slot *js = pfdev->js; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return ERR_PTR(-ENOMEM); + + fence->dev = pfdev->ddev; + fence->queue = js_num; + fence->seqno = ++js->queue[js_num].emit_seqno; + dma_fence_init(&fence->base, &panfrost_fence_ops, &js->job_lock, + js->queue[js_num].fence_context, fence->seqno); + + return &fence->base; +} + +static int panfrost_job_get_slot(struct panfrost_job *job) +{ + /* JS0: fragment jobs. + * JS1: vertex/tiler jobs + * JS2: compute jobs + */ + if (job->requirements & PANFROST_JD_REQ_FS) + return 0; + +/* Not exposed to userspace yet */ +#if 0 + if (job->requirements & PANFROST_JD_REQ_ONLY_COMPUTE) { + if ((job->requirements & PANFROST_JD_REQ_CORE_GRP_MASK) && + (job->pfdev->features.nr_core_groups == 2)) + return 2; + if (panfrost_has_hw_issue(job->pfdev, HW_ISSUE_8987)) + return 2; + } +#endif + return 1; +} + +static void panfrost_job_write_affinity(struct panfrost_device *pfdev, + u32 requirements, + int js) +{ + u64 affinity; + + /* + * Use all cores for now. + * Eventually we may need to support tiler only jobs and h/w with + * multiple (2) coherent core groups + */ + affinity = pfdev->features.shader_present; + + job_write(pfdev, JS_AFFINITY_NEXT_LO(js), affinity & 0xFFFFFFFF); + job_write(pfdev, JS_AFFINITY_NEXT_HI(js), affinity >> 32); +} + +static void panfrost_job_hw_submit(struct panfrost_job *job, int js) +{ + struct panfrost_device *pfdev = job->pfdev; + u32 cfg; + u64 jc_head = job->jc; + int ret; + + panfrost_devfreq_record_busy(&pfdev->pfdevfreq); + + ret = pm_runtime_get_sync(pfdev->dev); + if (ret < 0) + return; + + if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js)))) { + return; + } + + cfg = panfrost_mmu_as_get(pfdev, job->file_priv->mmu); + + job_write(pfdev, JS_HEAD_NEXT_LO(js), jc_head & 0xFFFFFFFF); + job_write(pfdev, JS_HEAD_NEXT_HI(js), jc_head >> 32); + + panfrost_job_write_affinity(pfdev, job->requirements, js); + + /* start MMU, medium priority, cache clean/flush on end, clean/flush on + * start */ + cfg |= JS_CONFIG_THREAD_PRI(8) | + JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE | + JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; + + if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION)) + cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION; + + if (panfrost_has_hw_issue(pfdev, HW_ISSUE_10649)) + cfg |= JS_CONFIG_START_MMU; + + job_write(pfdev, JS_CONFIG_NEXT(js), cfg); + + if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION)) + job_write(pfdev, JS_FLUSH_ID_NEXT(js), job->flush_id); + + /* GO ! */ + dev_dbg(pfdev->dev, "JS: Submitting atom %p to js[%d] with head=0x%llx", + job, js, jc_head); + + job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START); +} + +static void panfrost_acquire_object_fences(struct drm_gem_object **bos, + int bo_count, + struct dma_fence **implicit_fences) +{ + int i; + + for (i = 0; i < bo_count; i++) + implicit_fences[i] = dma_resv_get_excl_rcu(bos[i]->resv); +} + +static void panfrost_attach_object_fences(struct drm_gem_object **bos, + int bo_count, + struct dma_fence *fence) +{ + int i; + + for (i = 0; i < bo_count; i++) + dma_resv_add_excl_fence(bos[i]->resv, fence); +} + +int panfrost_job_push(struct panfrost_job *job) +{ + struct panfrost_device *pfdev = job->pfdev; + int slot = panfrost_job_get_slot(job); + struct drm_sched_entity *entity = &job->file_priv->sched_entity[slot]; + struct ww_acquire_ctx acquire_ctx; + int ret = 0; + + mutex_lock(&pfdev->sched_lock); + + ret = drm_gem_lock_reservations(job->bos, job->bo_count, + &acquire_ctx); + if (ret) { + mutex_unlock(&pfdev->sched_lock); + return ret; + } + + ret = drm_sched_job_init(&job->base, entity, NULL); + if (ret) { + mutex_unlock(&pfdev->sched_lock); + goto unlock; + } + + job->render_done_fence = dma_fence_get(&job->base.s_fence->finished); + + kref_get(&job->refcount); /* put by scheduler job completion */ + + panfrost_acquire_object_fences(job->bos, job->bo_count, + job->implicit_fences); + + drm_sched_entity_push_job(&job->base, entity); + + mutex_unlock(&pfdev->sched_lock); + + panfrost_attach_object_fences(job->bos, job->bo_count, + job->render_done_fence); + +unlock: + drm_gem_unlock_reservations(job->bos, job->bo_count, &acquire_ctx); + + return ret; +} + +static void panfrost_job_cleanup(struct kref *ref) +{ + struct panfrost_job *job = container_of(ref, struct panfrost_job, + refcount); + unsigned int i; + + if (job->in_fences) { + for (i = 0; i < job->in_fence_count; i++) + dma_fence_put(job->in_fences[i]); + kvfree(job->in_fences); + } + if (job->implicit_fences) { + for (i = 0; i < job->bo_count; i++) + dma_fence_put(job->implicit_fences[i]); + kvfree(job->implicit_fences); + } + dma_fence_put(job->done_fence); + dma_fence_put(job->render_done_fence); + + if (job->mappings) { + for (i = 0; i < job->bo_count; i++) { + if (!job->mappings[i]) + break; + + atomic_dec(&job->mappings[i]->obj->gpu_usecount); + panfrost_gem_mapping_put(job->mappings[i]); + } + kvfree(job->mappings); + } + + if (job->bos) { + for (i = 0; i < job->bo_count; i++) + drm_gem_object_put(job->bos[i]); + + kvfree(job->bos); + } + + kfree(job); +} + +void panfrost_job_put(struct panfrost_job *job) +{ + kref_put(&job->refcount, panfrost_job_cleanup); +} + +static void panfrost_job_free(struct drm_sched_job *sched_job) +{ + struct panfrost_job *job = to_panfrost_job(sched_job); + + drm_sched_job_cleanup(sched_job); + + panfrost_job_put(job); +} + +static struct dma_fence *panfrost_job_dependency(struct drm_sched_job *sched_job, + struct drm_sched_entity *s_entity) +{ + struct panfrost_job *job = to_panfrost_job(sched_job); + struct dma_fence *fence; + unsigned int i; + + /* Explicit fences */ + for (i = 0; i < job->in_fence_count; i++) { + if (job->in_fences[i]) { + fence = job->in_fences[i]; + job->in_fences[i] = NULL; + return fence; + } + } + + /* Implicit fences, max. one per BO */ + for (i = 0; i < job->bo_count; i++) { + if (job->implicit_fences[i]) { + fence = job->implicit_fences[i]; + job->implicit_fences[i] = NULL; + return fence; + } + } + + return NULL; +} + +static struct dma_fence *panfrost_job_run(struct drm_sched_job *sched_job) +{ + struct panfrost_job *job = to_panfrost_job(sched_job); + struct panfrost_device *pfdev = job->pfdev; + int slot = panfrost_job_get_slot(job); + struct dma_fence *fence = NULL; + + if (unlikely(job->base.s_fence->finished.error)) + return NULL; + + pfdev->jobs[slot] = job; + + fence = panfrost_fence_create(pfdev, slot); + if (IS_ERR(fence)) + return NULL; + + if (job->done_fence) + dma_fence_put(job->done_fence); + job->done_fence = dma_fence_get(fence); + + panfrost_job_hw_submit(job, slot); + + return fence; +} + +void panfrost_job_enable_interrupts(struct panfrost_device *pfdev) +{ + int j; + u32 irq_mask = 0; + + for (j = 0; j < NUM_JOB_SLOTS; j++) { + irq_mask |= MK_JS_MASK(j); + } + + job_write(pfdev, JOB_INT_CLEAR, irq_mask); + job_write(pfdev, JOB_INT_MASK, irq_mask); +} + +static bool panfrost_scheduler_stop(struct panfrost_queue_state *queue, + struct drm_sched_job *bad) +{ + enum panfrost_queue_status old_status; + bool stopped = false; + + mutex_lock(&queue->lock); + old_status = atomic_xchg(&queue->status, + PANFROST_QUEUE_STATUS_STOPPED); + if (old_status == PANFROST_QUEUE_STATUS_STOPPED) + goto out; + + WARN_ON(old_status != PANFROST_QUEUE_STATUS_ACTIVE); + drm_sched_stop(&queue->sched, bad); + if (bad) + drm_sched_increase_karma(bad); + + stopped = true; + + /* + * Set the timeout to max so the timer doesn't get started + * when we return from the timeout handler (restored in + * panfrost_scheduler_start()). + */ + queue->sched.timeout = MAX_SCHEDULE_TIMEOUT; + +out: + mutex_unlock(&queue->lock); + + return stopped; +} + +static void panfrost_scheduler_start(struct panfrost_queue_state *queue) +{ + enum panfrost_queue_status old_status; + + mutex_lock(&queue->lock); + old_status = atomic_xchg(&queue->status, + PANFROST_QUEUE_STATUS_STARTING); + WARN_ON(old_status != PANFROST_QUEUE_STATUS_STOPPED); + + /* Restore the original timeout before starting the scheduler. */ + queue->sched.timeout = msecs_to_jiffies(JOB_TIMEOUT_MS); + drm_sched_resubmit_jobs(&queue->sched); + drm_sched_start(&queue->sched, true); + old_status = atomic_xchg(&queue->status, + PANFROST_QUEUE_STATUS_ACTIVE); + if (old_status == PANFROST_QUEUE_STATUS_FAULT_PENDING) + drm_sched_fault(&queue->sched); + + mutex_unlock(&queue->lock); +} + +static void panfrost_job_timedout(struct drm_sched_job *sched_job) +{ + struct panfrost_job *job = to_panfrost_job(sched_job); + struct panfrost_device *pfdev = job->pfdev; + int js = panfrost_job_get_slot(job); + + /* + * If the GPU managed to complete this jobs fence, the timeout is + * spurious. Bail out. + */ + if (dma_fence_is_signaled(job->done_fence)) + return; + + dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p", + js, + job_read(pfdev, JS_CONFIG(js)), + job_read(pfdev, JS_STATUS(js)), + job_read(pfdev, JS_HEAD_LO(js)), + job_read(pfdev, JS_TAIL_LO(js)), + sched_job); + + /* Scheduler is already stopped, nothing to do. */ + if (!panfrost_scheduler_stop(&pfdev->js->queue[js], sched_job)) + return; + + /* Schedule a reset if there's no reset in progress. */ + if (!atomic_xchg(&pfdev->reset.pending, 1)) + schedule_work(&pfdev->reset.work); +} + +static const struct drm_sched_backend_ops panfrost_sched_ops = { + .dependency = panfrost_job_dependency, + .run_job = panfrost_job_run, + .timedout_job = panfrost_job_timedout, + .free_job = panfrost_job_free +}; + +static irqreturn_t panfrost_job_irq_handler(int irq, void *data) +{ + struct panfrost_device *pfdev = data; + u32 status = job_read(pfdev, JOB_INT_STAT); + int j; + + dev_dbg(pfdev->dev, "jobslot irq status=%x\n", status); + + if (!status) + return IRQ_NONE; + + pm_runtime_mark_last_busy(pfdev->dev); + + for (j = 0; status; j++) { + u32 mask = MK_JS_MASK(j); + + if (!(status & mask)) + continue; + + job_write(pfdev, JOB_INT_CLEAR, mask); + + if (status & JOB_INT_MASK_ERR(j)) { + enum panfrost_queue_status old_status; + + job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_NOP); + + dev_err(pfdev->dev, "js fault, js=%d, status=%s, head=0x%x, tail=0x%x", + j, + panfrost_exception_name(pfdev, job_read(pfdev, JS_STATUS(j))), + job_read(pfdev, JS_HEAD_LO(j)), + job_read(pfdev, JS_TAIL_LO(j))); + + /* + * When the queue is being restarted we don't report + * faults directly to avoid races between the timeout + * and reset handlers. panfrost_scheduler_start() will + * call drm_sched_fault() after the queue has been + * started if status == FAULT_PENDING. + */ + old_status = atomic_cmpxchg(&pfdev->js->queue[j].status, + PANFROST_QUEUE_STATUS_STARTING, + PANFROST_QUEUE_STATUS_FAULT_PENDING); + if (old_status == PANFROST_QUEUE_STATUS_ACTIVE) + drm_sched_fault(&pfdev->js->queue[j].sched); + } + + if (status & JOB_INT_MASK_DONE(j)) { + struct panfrost_job *job; + + spin_lock(&pfdev->js->job_lock); + job = pfdev->jobs[j]; + /* Only NULL if job timeout occurred */ + if (job) { + pfdev->jobs[j] = NULL; + + panfrost_mmu_as_put(pfdev, job->file_priv->mmu); + panfrost_devfreq_record_idle(&pfdev->pfdevfreq); + + dma_fence_signal_locked(job->done_fence); + pm_runtime_put_autosuspend(pfdev->dev); + } + spin_unlock(&pfdev->js->job_lock); + } + + status &= ~mask; + } + + return IRQ_HANDLED; +} + +static void panfrost_reset(struct work_struct *work) +{ + struct panfrost_device *pfdev = container_of(work, + struct panfrost_device, + reset.work); + unsigned long flags; + unsigned int i; + bool cookie; + + cookie = dma_fence_begin_signalling(); + for (i = 0; i < NUM_JOB_SLOTS; i++) { + /* + * We want pending timeouts to be handled before we attempt + * to stop the scheduler. If we don't do that and the timeout + * handler is in flight, it might have removed the bad job + * from the list, and we'll lose this job if the reset handler + * enters the critical section in panfrost_scheduler_stop() + * before the timeout handler. + * + * Timeout is set to MAX_SCHEDULE_TIMEOUT - 1 because we need + * something big enough to make sure the timer will not expire + * before we manage to stop the scheduler, but we can't use + * MAX_SCHEDULE_TIMEOUT because drm_sched_get_cleanup_job() + * considers that as 'timer is not running' and will dequeue + * the job without making sure the timeout handler is not + * running. + */ + pfdev->js->queue[i].sched.timeout = MAX_SCHEDULE_TIMEOUT - 1; + cancel_delayed_work_sync(&pfdev->js->queue[i].sched.work_tdr); + panfrost_scheduler_stop(&pfdev->js->queue[i], NULL); + } + + /* All timers have been stopped, we can safely reset the pending state. */ + atomic_set(&pfdev->reset.pending, 0); + + spin_lock_irqsave(&pfdev->js->job_lock, flags); + for (i = 0; i < NUM_JOB_SLOTS; i++) { + if (pfdev->jobs[i]) { + pm_runtime_put_noidle(pfdev->dev); + panfrost_devfreq_record_idle(&pfdev->pfdevfreq); + pfdev->jobs[i] = NULL; + } + } + spin_unlock_irqrestore(&pfdev->js->job_lock, flags); + + panfrost_device_reset(pfdev); + + for (i = 0; i < NUM_JOB_SLOTS; i++) + panfrost_scheduler_start(&pfdev->js->queue[i]); + + dma_fence_end_signalling(cookie); +} + +int panfrost_job_init(struct panfrost_device *pfdev) +{ + struct panfrost_job_slot *js; + int ret, j, irq; + + INIT_WORK(&pfdev->reset.work, panfrost_reset); + + pfdev->js = js = devm_kzalloc(pfdev->dev, sizeof(*js), GFP_KERNEL); + if (!js) + return -ENOMEM; + + spin_lock_init(&js->job_lock); + + irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "job"); + if (irq <= 0) + return -ENODEV; + + ret = devm_request_irq(pfdev->dev, irq, panfrost_job_irq_handler, + IRQF_SHARED, KBUILD_MODNAME "-job", pfdev); + if (ret) { + dev_err(pfdev->dev, "failed to request job irq"); + return ret; + } + + for (j = 0; j < NUM_JOB_SLOTS; j++) { + mutex_init(&js->queue[j].lock); + + js->queue[j].fence_context = dma_fence_context_alloc(1); + + ret = drm_sched_init(&js->queue[j].sched, + &panfrost_sched_ops, + 1, 0, msecs_to_jiffies(JOB_TIMEOUT_MS), + "pan_js"); + if (ret) { + dev_err(pfdev->dev, "Failed to create scheduler: %d.", ret); + goto err_sched; + } + } + + panfrost_job_enable_interrupts(pfdev); + + return 0; + +err_sched: + for (j--; j >= 0; j--) + drm_sched_fini(&js->queue[j].sched); + + return ret; +} + +void panfrost_job_fini(struct panfrost_device *pfdev) +{ + struct panfrost_job_slot *js = pfdev->js; + int j; + + job_write(pfdev, JOB_INT_MASK, 0); + + for (j = 0; j < NUM_JOB_SLOTS; j++) { + drm_sched_fini(&js->queue[j].sched); + mutex_destroy(&js->queue[j].lock); + } + +} + +int panfrost_job_open(struct panfrost_file_priv *panfrost_priv) +{ + struct panfrost_device *pfdev = panfrost_priv->pfdev; + struct panfrost_job_slot *js = pfdev->js; + struct drm_gpu_scheduler *sched; + int ret, i; + + for (i = 0; i < NUM_JOB_SLOTS; i++) { + sched = &js->queue[i].sched; + ret = drm_sched_entity_init(&panfrost_priv->sched_entity[i], + DRM_SCHED_PRIORITY_NORMAL, &sched, + 1, NULL); + if (WARN_ON(ret)) + return ret; + } + return 0; +} + +void panfrost_job_close(struct panfrost_file_priv *panfrost_priv) +{ + int i; + + for (i = 0; i < NUM_JOB_SLOTS; i++) + drm_sched_entity_destroy(&panfrost_priv->sched_entity[i]); +} + +int panfrost_job_is_idle(struct panfrost_device *pfdev) +{ + struct panfrost_job_slot *js = pfdev->js; + int i; + + for (i = 0; i < NUM_JOB_SLOTS; i++) { + /* If there are any jobs in the HW queue, we're not idle */ + if (atomic_read(&js->queue[i].sched.hw_rq_count)) + return false; + } + + return true; +} diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h new file mode 100644 index 000000000..bbd3ba97f --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_job.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright 2019 Collabora ltd. */ + +#ifndef __PANFROST_JOB_H__ +#define __PANFROST_JOB_H__ + +#include <uapi/drm/panfrost_drm.h> +#include <drm/gpu_scheduler.h> + +struct panfrost_device; +struct panfrost_gem_object; +struct panfrost_file_priv; + +struct panfrost_job { + struct drm_sched_job base; + + struct kref refcount; + + struct panfrost_device *pfdev; + struct panfrost_file_priv *file_priv; + + /* Optional fences userspace can pass in for the job to depend on. */ + struct dma_fence **in_fences; + u32 in_fence_count; + + /* Fence to be signaled by IRQ handler when the job is complete. */ + struct dma_fence *done_fence; + + __u64 jc; + __u32 requirements; + __u32 flush_id; + + /* Exclusive fences we have taken from the BOs to wait for */ + struct dma_fence **implicit_fences; + struct panfrost_gem_mapping **mappings; + struct drm_gem_object **bos; + u32 bo_count; + + /* Fence to be signaled by drm-sched once its done with the job */ + struct dma_fence *render_done_fence; +}; + +int panfrost_job_init(struct panfrost_device *pfdev); +void panfrost_job_fini(struct panfrost_device *pfdev); +int panfrost_job_open(struct panfrost_file_priv *panfrost_priv); +void panfrost_job_close(struct panfrost_file_priv *panfrost_priv); +int panfrost_job_push(struct panfrost_job *job); +void panfrost_job_put(struct panfrost_job *job); +void panfrost_job_enable_interrupts(struct panfrost_device *pfdev); +int panfrost_job_is_idle(struct panfrost_device *pfdev); + +#endif diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c new file mode 100644 index 000000000..c81f3ac22 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -0,0 +1,715 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */ + +#include <drm/panfrost_drm.h> + +#include <linux/atomic.h> +#include <linux/bitfield.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/iopoll.h> +#include <linux/io-pgtable.h> +#include <linux/iommu.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <linux/shmem_fs.h> +#include <linux/sizes.h> + +#include "panfrost_device.h" +#include "panfrost_mmu.h" +#include "panfrost_gem.h" +#include "panfrost_features.h" +#include "panfrost_regs.h" + +#define mmu_write(dev, reg, data) writel(data, dev->iomem + reg) +#define mmu_read(dev, reg) readl(dev->iomem + reg) + +static int wait_ready(struct panfrost_device *pfdev, u32 as_nr) +{ + int ret; + u32 val; + + /* Wait for the MMU status to indicate there is no active command, in + * case one is pending. */ + ret = readl_relaxed_poll_timeout_atomic(pfdev->iomem + AS_STATUS(as_nr), + val, !(val & AS_STATUS_AS_ACTIVE), 10, 1000); + + if (ret) + dev_err(pfdev->dev, "AS_ACTIVE bit stuck\n"); + + return ret; +} + +static int write_cmd(struct panfrost_device *pfdev, u32 as_nr, u32 cmd) +{ + int status; + + /* write AS_COMMAND when MMU is ready to accept another command */ + status = wait_ready(pfdev, as_nr); + if (!status) + mmu_write(pfdev, AS_COMMAND(as_nr), cmd); + + return status; +} + +static void lock_region(struct panfrost_device *pfdev, u32 as_nr, + u64 iova, u64 size) +{ + u8 region_width; + u64 region = iova & PAGE_MASK; + + /* The size is encoded as ceil(log2) minus(1), which may be calculated + * with fls. The size must be clamped to hardware bounds. + */ + size = max_t(u64, size, AS_LOCK_REGION_MIN_SIZE); + region_width = fls64(size - 1) - 1; + region |= region_width; + + /* Lock the region that needs to be updated */ + mmu_write(pfdev, AS_LOCKADDR_LO(as_nr), region & 0xFFFFFFFFUL); + mmu_write(pfdev, AS_LOCKADDR_HI(as_nr), (region >> 32) & 0xFFFFFFFFUL); + write_cmd(pfdev, as_nr, AS_COMMAND_LOCK); +} + + +static int mmu_hw_do_operation_locked(struct panfrost_device *pfdev, int as_nr, + u64 iova, u64 size, u32 op) +{ + if (as_nr < 0) + return 0; + + if (op != AS_COMMAND_UNLOCK) + lock_region(pfdev, as_nr, iova, size); + + /* Run the MMU operation */ + write_cmd(pfdev, as_nr, op); + + /* Wait for the flush to complete */ + return wait_ready(pfdev, as_nr); +} + +static int mmu_hw_do_operation(struct panfrost_device *pfdev, + struct panfrost_mmu *mmu, + u64 iova, u64 size, u32 op) +{ + int ret; + + spin_lock(&pfdev->as_lock); + ret = mmu_hw_do_operation_locked(pfdev, mmu->as, iova, size, op); + spin_unlock(&pfdev->as_lock); + return ret; +} + +static void panfrost_mmu_enable(struct panfrost_device *pfdev, struct panfrost_mmu *mmu) +{ + int as_nr = mmu->as; + struct io_pgtable_cfg *cfg = &mmu->pgtbl_cfg; + u64 transtab = cfg->arm_mali_lpae_cfg.transtab; + u64 memattr = cfg->arm_mali_lpae_cfg.memattr; + + mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM); + + mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), transtab & 0xffffffffUL); + mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), transtab >> 32); + + /* Need to revisit mem attrs. + * NC is the default, Mali driver is inner WT. + */ + mmu_write(pfdev, AS_MEMATTR_LO(as_nr), memattr & 0xffffffffUL); + mmu_write(pfdev, AS_MEMATTR_HI(as_nr), memattr >> 32); + + write_cmd(pfdev, as_nr, AS_COMMAND_UPDATE); +} + +static void panfrost_mmu_disable(struct panfrost_device *pfdev, u32 as_nr) +{ + mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM); + + mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), 0); + mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), 0); + + mmu_write(pfdev, AS_MEMATTR_LO(as_nr), 0); + mmu_write(pfdev, AS_MEMATTR_HI(as_nr), 0); + + write_cmd(pfdev, as_nr, AS_COMMAND_UPDATE); +} + +u32 panfrost_mmu_as_get(struct panfrost_device *pfdev, struct panfrost_mmu *mmu) +{ + int as; + + spin_lock(&pfdev->as_lock); + + as = mmu->as; + if (as >= 0) { + int en = atomic_inc_return(&mmu->as_count); + + /* + * AS can be retained by active jobs or a perfcnt context, + * hence the '+ 1' here. + */ + WARN_ON(en >= (NUM_JOB_SLOTS + 1)); + + list_move(&mmu->list, &pfdev->as_lru_list); + goto out; + } + + /* Check for a free AS */ + as = ffz(pfdev->as_alloc_mask); + if (!(BIT(as) & pfdev->features.as_present)) { + struct panfrost_mmu *lru_mmu; + + list_for_each_entry_reverse(lru_mmu, &pfdev->as_lru_list, list) { + if (!atomic_read(&lru_mmu->as_count)) + break; + } + WARN_ON(&lru_mmu->list == &pfdev->as_lru_list); + + list_del_init(&lru_mmu->list); + as = lru_mmu->as; + + WARN_ON(as < 0); + lru_mmu->as = -1; + } + + /* Assign the free or reclaimed AS to the FD */ + mmu->as = as; + set_bit(as, &pfdev->as_alloc_mask); + atomic_set(&mmu->as_count, 1); + list_add(&mmu->list, &pfdev->as_lru_list); + + dev_dbg(pfdev->dev, "Assigned AS%d to mmu %p, alloc_mask=%lx", as, mmu, pfdev->as_alloc_mask); + + panfrost_mmu_enable(pfdev, mmu); + +out: + spin_unlock(&pfdev->as_lock); + return as; +} + +void panfrost_mmu_as_put(struct panfrost_device *pfdev, struct panfrost_mmu *mmu) +{ + atomic_dec(&mmu->as_count); + WARN_ON(atomic_read(&mmu->as_count) < 0); +} + +void panfrost_mmu_reset(struct panfrost_device *pfdev) +{ + struct panfrost_mmu *mmu, *mmu_tmp; + + spin_lock(&pfdev->as_lock); + + pfdev->as_alloc_mask = 0; + + list_for_each_entry_safe(mmu, mmu_tmp, &pfdev->as_lru_list, list) { + mmu->as = -1; + atomic_set(&mmu->as_count, 0); + list_del_init(&mmu->list); + } + + spin_unlock(&pfdev->as_lock); + + mmu_write(pfdev, MMU_INT_CLEAR, ~0); + mmu_write(pfdev, MMU_INT_MASK, ~0); +} + +static size_t get_pgsize(u64 addr, size_t size) +{ + if (addr & (SZ_2M - 1) || size < SZ_2M) + return SZ_4K; + + return SZ_2M; +} + +static void panfrost_mmu_flush_range(struct panfrost_device *pfdev, + struct panfrost_mmu *mmu, + u64 iova, u64 size) +{ + if (mmu->as < 0) + return; + + pm_runtime_get_noresume(pfdev->dev); + + /* Flush the PTs only if we're already awake */ + if (pm_runtime_active(pfdev->dev)) + mmu_hw_do_operation(pfdev, mmu, iova, size, AS_COMMAND_FLUSH_PT); + + pm_runtime_put_autosuspend(pfdev->dev); +} + +static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu, + u64 iova, int prot, struct sg_table *sgt) +{ + unsigned int count; + struct scatterlist *sgl; + struct io_pgtable_ops *ops = mmu->pgtbl_ops; + u64 start_iova = iova; + + for_each_sgtable_dma_sg(sgt, sgl, count) { + unsigned long paddr = sg_dma_address(sgl); + size_t len = sg_dma_len(sgl); + + dev_dbg(pfdev->dev, "map: as=%d, iova=%llx, paddr=%lx, len=%zx", mmu->as, iova, paddr, len); + + while (len) { + size_t pgsize = get_pgsize(iova | paddr, len); + + ops->map(ops, iova, paddr, pgsize, prot, GFP_KERNEL); + iova += pgsize; + paddr += pgsize; + len -= pgsize; + } + } + + panfrost_mmu_flush_range(pfdev, mmu, start_iova, iova - start_iova); + + return 0; +} + +int panfrost_mmu_map(struct panfrost_gem_mapping *mapping) +{ + struct panfrost_gem_object *bo = mapping->obj; + struct drm_gem_object *obj = &bo->base.base; + struct panfrost_device *pfdev = to_panfrost_device(obj->dev); + struct sg_table *sgt; + int prot = IOMMU_READ | IOMMU_WRITE; + + if (WARN_ON(mapping->active)) + return 0; + + if (bo->noexec) + prot |= IOMMU_NOEXEC; + + sgt = drm_gem_shmem_get_pages_sgt(obj); + if (WARN_ON(IS_ERR(sgt))) + return PTR_ERR(sgt); + + mmu_map_sg(pfdev, mapping->mmu, mapping->mmnode.start << PAGE_SHIFT, + prot, sgt); + mapping->active = true; + + return 0; +} + +void panfrost_mmu_unmap(struct panfrost_gem_mapping *mapping) +{ + struct panfrost_gem_object *bo = mapping->obj; + struct drm_gem_object *obj = &bo->base.base; + struct panfrost_device *pfdev = to_panfrost_device(obj->dev); + struct io_pgtable_ops *ops = mapping->mmu->pgtbl_ops; + u64 iova = mapping->mmnode.start << PAGE_SHIFT; + size_t len = mapping->mmnode.size << PAGE_SHIFT; + size_t unmapped_len = 0; + + if (WARN_ON(!mapping->active)) + return; + + dev_dbg(pfdev->dev, "unmap: as=%d, iova=%llx, len=%zx", + mapping->mmu->as, iova, len); + + while (unmapped_len < len) { + size_t unmapped_page; + size_t pgsize = get_pgsize(iova, len - unmapped_len); + + if (ops->iova_to_phys(ops, iova)) { + unmapped_page = ops->unmap(ops, iova, pgsize, NULL); + WARN_ON(unmapped_page != pgsize); + } + iova += pgsize; + unmapped_len += pgsize; + } + + panfrost_mmu_flush_range(pfdev, mapping->mmu, + mapping->mmnode.start << PAGE_SHIFT, len); + mapping->active = false; +} + +static void mmu_tlb_inv_context_s1(void *cookie) +{} + +static void mmu_tlb_sync_context(void *cookie) +{ + //struct panfrost_mmu *mmu = cookie; + // TODO: Wait 1000 GPU cycles for HW_ISSUE_6367/T60X +} + +static void mmu_tlb_flush_walk(unsigned long iova, size_t size, size_t granule, + void *cookie) +{ + mmu_tlb_sync_context(cookie); +} + +static void mmu_tlb_flush_leaf(unsigned long iova, size_t size, size_t granule, + void *cookie) +{ + mmu_tlb_sync_context(cookie); +} + +static const struct iommu_flush_ops mmu_tlb_ops = { + .tlb_flush_all = mmu_tlb_inv_context_s1, + .tlb_flush_walk = mmu_tlb_flush_walk, + .tlb_flush_leaf = mmu_tlb_flush_leaf, +}; + +static struct panfrost_gem_mapping * +addr_to_mapping(struct panfrost_device *pfdev, int as, u64 addr) +{ + struct panfrost_gem_mapping *mapping = NULL; + struct drm_mm_node *node; + u64 offset = addr >> PAGE_SHIFT; + struct panfrost_mmu *mmu; + + spin_lock(&pfdev->as_lock); + list_for_each_entry(mmu, &pfdev->as_lru_list, list) { + if (as == mmu->as) + goto found_mmu; + } + goto out; + +found_mmu: + + spin_lock(&mmu->mm_lock); + + drm_mm_for_each_node(node, &mmu->mm) { + if (offset >= node->start && + offset < (node->start + node->size)) { + mapping = drm_mm_node_to_panfrost_mapping(node); + + kref_get(&mapping->refcount); + break; + } + } + + spin_unlock(&mmu->mm_lock); +out: + spin_unlock(&pfdev->as_lock); + return mapping; +} + +#define NUM_FAULT_PAGES (SZ_2M / PAGE_SIZE) + +static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, + u64 addr) +{ + int ret, i; + struct panfrost_gem_mapping *bomapping; + struct panfrost_gem_object *bo; + struct address_space *mapping; + pgoff_t page_offset; + struct sg_table *sgt; + struct page **pages; + + bomapping = addr_to_mapping(pfdev, as, addr); + if (!bomapping) + return -ENOENT; + + bo = bomapping->obj; + if (!bo->is_heap) { + dev_WARN(pfdev->dev, "matching BO is not heap type (GPU VA = %llx)", + bomapping->mmnode.start << PAGE_SHIFT); + ret = -EINVAL; + goto err_bo; + } + WARN_ON(bomapping->mmu->as != as); + + /* Assume 2MB alignment and size multiple */ + addr &= ~((u64)SZ_2M - 1); + page_offset = addr >> PAGE_SHIFT; + page_offset -= bomapping->mmnode.start; + + mutex_lock(&bo->base.pages_lock); + + if (!bo->base.pages) { + bo->sgts = kvmalloc_array(bo->base.base.size / SZ_2M, + sizeof(struct sg_table), GFP_KERNEL | __GFP_ZERO); + if (!bo->sgts) { + mutex_unlock(&bo->base.pages_lock); + ret = -ENOMEM; + goto err_bo; + } + + pages = kvmalloc_array(bo->base.base.size >> PAGE_SHIFT, + sizeof(struct page *), GFP_KERNEL | __GFP_ZERO); + if (!pages) { + kvfree(bo->sgts); + bo->sgts = NULL; + mutex_unlock(&bo->base.pages_lock); + ret = -ENOMEM; + goto err_bo; + } + bo->base.pages = pages; + bo->base.pages_use_count = 1; + } else { + pages = bo->base.pages; + if (pages[page_offset]) { + /* Pages are already mapped, bail out. */ + mutex_unlock(&bo->base.pages_lock); + goto out; + } + } + + mapping = bo->base.base.filp->f_mapping; + mapping_set_unevictable(mapping); + + for (i = page_offset; i < page_offset + NUM_FAULT_PAGES; i++) { + pages[i] = shmem_read_mapping_page(mapping, i); + if (IS_ERR(pages[i])) { + mutex_unlock(&bo->base.pages_lock); + ret = PTR_ERR(pages[i]); + pages[i] = NULL; + goto err_pages; + } + } + + mutex_unlock(&bo->base.pages_lock); + + sgt = &bo->sgts[page_offset / (SZ_2M / PAGE_SIZE)]; + ret = sg_alloc_table_from_pages(sgt, pages + page_offset, + NUM_FAULT_PAGES, 0, SZ_2M, GFP_KERNEL); + if (ret) + goto err_pages; + + ret = dma_map_sgtable(pfdev->dev, sgt, DMA_BIDIRECTIONAL, 0); + if (ret) + goto err_map; + + mmu_map_sg(pfdev, bomapping->mmu, addr, + IOMMU_WRITE | IOMMU_READ | IOMMU_NOEXEC, sgt); + + bomapping->active = true; + + dev_dbg(pfdev->dev, "mapped page fault @ AS%d %llx", as, addr); + +out: + panfrost_gem_mapping_put(bomapping); + + return 0; + +err_map: + sg_free_table(sgt); +err_pages: + drm_gem_shmem_put_pages(&bo->base); +err_bo: + panfrost_gem_mapping_put(bomapping); + return ret; +} + +static void panfrost_mmu_release_ctx(struct kref *kref) +{ + struct panfrost_mmu *mmu = container_of(kref, struct panfrost_mmu, + refcount); + struct panfrost_device *pfdev = mmu->pfdev; + + spin_lock(&pfdev->as_lock); + if (mmu->as >= 0) { + pm_runtime_get_noresume(pfdev->dev); + if (pm_runtime_active(pfdev->dev)) + panfrost_mmu_disable(pfdev, mmu->as); + pm_runtime_put_autosuspend(pfdev->dev); + + clear_bit(mmu->as, &pfdev->as_alloc_mask); + clear_bit(mmu->as, &pfdev->as_in_use_mask); + list_del(&mmu->list); + } + spin_unlock(&pfdev->as_lock); + + free_io_pgtable_ops(mmu->pgtbl_ops); + drm_mm_takedown(&mmu->mm); + kfree(mmu); +} + +void panfrost_mmu_ctx_put(struct panfrost_mmu *mmu) +{ + kref_put(&mmu->refcount, panfrost_mmu_release_ctx); +} + +struct panfrost_mmu *panfrost_mmu_ctx_get(struct panfrost_mmu *mmu) +{ + kref_get(&mmu->refcount); + + return mmu; +} + +#define PFN_4G (SZ_4G >> PAGE_SHIFT) +#define PFN_4G_MASK (PFN_4G - 1) +#define PFN_16M (SZ_16M >> PAGE_SHIFT) + +static void panfrost_drm_mm_color_adjust(const struct drm_mm_node *node, + unsigned long color, + u64 *start, u64 *end) +{ + /* Executable buffers can't start or end on a 4GB boundary */ + if (!(color & PANFROST_BO_NOEXEC)) { + u64 next_seg; + + if ((*start & PFN_4G_MASK) == 0) + (*start)++; + + if ((*end & PFN_4G_MASK) == 0) + (*end)--; + + next_seg = ALIGN(*start, PFN_4G); + if (next_seg - *start <= PFN_16M) + *start = next_seg + 1; + + *end = min(*end, ALIGN(*start, PFN_4G) - 1); + } +} + +struct panfrost_mmu *panfrost_mmu_ctx_create(struct panfrost_device *pfdev) +{ + struct panfrost_mmu *mmu; + + mmu = kzalloc(sizeof(*mmu), GFP_KERNEL); + if (!mmu) + return ERR_PTR(-ENOMEM); + + mmu->pfdev = pfdev; + spin_lock_init(&mmu->mm_lock); + + /* 4G enough for now. can be 48-bit */ + drm_mm_init(&mmu->mm, SZ_32M >> PAGE_SHIFT, (SZ_4G - SZ_32M) >> PAGE_SHIFT); + mmu->mm.color_adjust = panfrost_drm_mm_color_adjust; + + INIT_LIST_HEAD(&mmu->list); + mmu->as = -1; + + mmu->pgtbl_cfg = (struct io_pgtable_cfg) { + .pgsize_bitmap = SZ_4K | SZ_2M, + .ias = FIELD_GET(0xff, pfdev->features.mmu_features), + .oas = FIELD_GET(0xff00, pfdev->features.mmu_features), + .coherent_walk = pfdev->coherent, + .tlb = &mmu_tlb_ops, + .iommu_dev = pfdev->dev, + }; + + mmu->pgtbl_ops = alloc_io_pgtable_ops(ARM_MALI_LPAE, &mmu->pgtbl_cfg, + mmu); + if (!mmu->pgtbl_ops) { + kfree(mmu); + return ERR_PTR(-EINVAL); + } + + kref_init(&mmu->refcount); + + return mmu; +} + +static const char *access_type_name(struct panfrost_device *pfdev, + u32 fault_status) +{ + switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) { + case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: + if (panfrost_has_hw_feature(pfdev, HW_FEATURE_AARCH64_MMU)) + return "ATOMIC"; + else + return "UNKNOWN"; + case AS_FAULTSTATUS_ACCESS_TYPE_READ: + return "READ"; + case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: + return "WRITE"; + case AS_FAULTSTATUS_ACCESS_TYPE_EX: + return "EXECUTE"; + default: + WARN_ON(1); + return NULL; + } +} + +static irqreturn_t panfrost_mmu_irq_handler(int irq, void *data) +{ + struct panfrost_device *pfdev = data; + + if (!mmu_read(pfdev, MMU_INT_STAT)) + return IRQ_NONE; + + mmu_write(pfdev, MMU_INT_MASK, 0); + return IRQ_WAKE_THREAD; +} + +static irqreturn_t panfrost_mmu_irq_handler_thread(int irq, void *data) +{ + struct panfrost_device *pfdev = data; + u32 status = mmu_read(pfdev, MMU_INT_RAWSTAT); + int i, ret; + + for (i = 0; status; i++) { + u32 mask = BIT(i) | BIT(i + 16); + u64 addr; + u32 fault_status; + u32 exception_type; + u32 access_type; + u32 source_id; + + if (!(status & mask)) + continue; + + fault_status = mmu_read(pfdev, AS_FAULTSTATUS(i)); + addr = mmu_read(pfdev, AS_FAULTADDRESS_LO(i)); + addr |= (u64)mmu_read(pfdev, AS_FAULTADDRESS_HI(i)) << 32; + + /* decode the fault status */ + exception_type = fault_status & 0xFF; + access_type = (fault_status >> 8) & 0x3; + source_id = (fault_status >> 16); + + mmu_write(pfdev, MMU_INT_CLEAR, mask); + + /* Page fault only */ + ret = -1; + if ((status & mask) == BIT(i) && (exception_type & 0xF8) == 0xC0) + ret = panfrost_mmu_map_fault_addr(pfdev, i, addr); + + if (ret) + /* terminal fault, print info about the fault */ + dev_err(pfdev->dev, + "Unhandled Page fault in AS%d at VA 0x%016llX\n" + "Reason: %s\n" + "raw fault status: 0x%X\n" + "decoded fault status: %s\n" + "exception type 0x%X: %s\n" + "access type 0x%X: %s\n" + "source id 0x%X\n", + i, addr, + "TODO", + fault_status, + (fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"), + exception_type, panfrost_exception_name(pfdev, exception_type), + access_type, access_type_name(pfdev, fault_status), + source_id); + + status &= ~mask; + } + + mmu_write(pfdev, MMU_INT_MASK, ~0); + return IRQ_HANDLED; +}; + +int panfrost_mmu_init(struct panfrost_device *pfdev) +{ + int err, irq; + + irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "mmu"); + if (irq <= 0) + return -ENODEV; + + err = devm_request_threaded_irq(pfdev->dev, irq, + panfrost_mmu_irq_handler, + panfrost_mmu_irq_handler_thread, + IRQF_SHARED, KBUILD_MODNAME "-mmu", + pfdev); + + if (err) { + dev_err(pfdev->dev, "failed to request mmu irq"); + return err; + } + + return 0; +} + +void panfrost_mmu_fini(struct panfrost_device *pfdev) +{ + mmu_write(pfdev, MMU_INT_MASK, 0); +} diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.h b/drivers/gpu/drm/panfrost/panfrost_mmu.h new file mode 100644 index 000000000..cc2a0d307 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */ + +#ifndef __PANFROST_MMU_H__ +#define __PANFROST_MMU_H__ + +struct panfrost_gem_mapping; +struct panfrost_file_priv; +struct panfrost_mmu; + +int panfrost_mmu_map(struct panfrost_gem_mapping *mapping); +void panfrost_mmu_unmap(struct panfrost_gem_mapping *mapping); + +int panfrost_mmu_init(struct panfrost_device *pfdev); +void panfrost_mmu_fini(struct panfrost_device *pfdev); +void panfrost_mmu_reset(struct panfrost_device *pfdev); + +u32 panfrost_mmu_as_get(struct panfrost_device *pfdev, struct panfrost_mmu *mmu); +void panfrost_mmu_as_put(struct panfrost_device *pfdev, struct panfrost_mmu *mmu); + +struct panfrost_mmu *panfrost_mmu_ctx_get(struct panfrost_mmu *mmu); +void panfrost_mmu_ctx_put(struct panfrost_mmu *mmu); +struct panfrost_mmu *panfrost_mmu_ctx_create(struct panfrost_device *pfdev); + +#endif diff --git a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c new file mode 100644 index 000000000..fdbc8d949 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c @@ -0,0 +1,346 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2019 Collabora Ltd */ + +#include <drm/drm_file.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/panfrost_drm.h> +#include <linux/completion.h> +#include <linux/iopoll.h> +#include <linux/pm_runtime.h> +#include <linux/slab.h> +#include <linux/uaccess.h> + +#include "panfrost_device.h" +#include "panfrost_features.h" +#include "panfrost_gem.h" +#include "panfrost_issues.h" +#include "panfrost_job.h" +#include "panfrost_mmu.h" +#include "panfrost_perfcnt.h" +#include "panfrost_regs.h" + +#define COUNTERS_PER_BLOCK 64 +#define BYTES_PER_COUNTER 4 +#define BLOCKS_PER_COREGROUP 8 +#define V4_SHADERS_PER_COREGROUP 4 + +struct panfrost_perfcnt { + struct panfrost_gem_mapping *mapping; + size_t bosize; + void *buf; + struct panfrost_file_priv *user; + struct mutex lock; + struct completion dump_comp; +}; + +void panfrost_perfcnt_clean_cache_done(struct panfrost_device *pfdev) +{ + complete(&pfdev->perfcnt->dump_comp); +} + +void panfrost_perfcnt_sample_done(struct panfrost_device *pfdev) +{ + gpu_write(pfdev, GPU_CMD, GPU_CMD_CLEAN_CACHES); +} + +static int panfrost_perfcnt_dump_locked(struct panfrost_device *pfdev) +{ + u64 gpuva; + int ret; + + reinit_completion(&pfdev->perfcnt->dump_comp); + gpuva = pfdev->perfcnt->mapping->mmnode.start << PAGE_SHIFT; + gpu_write(pfdev, GPU_PERFCNT_BASE_LO, gpuva); + gpu_write(pfdev, GPU_PERFCNT_BASE_HI, gpuva >> 32); + gpu_write(pfdev, GPU_INT_CLEAR, + GPU_IRQ_CLEAN_CACHES_COMPLETED | + GPU_IRQ_PERFCNT_SAMPLE_COMPLETED); + gpu_write(pfdev, GPU_CMD, GPU_CMD_PERFCNT_SAMPLE); + ret = wait_for_completion_interruptible_timeout(&pfdev->perfcnt->dump_comp, + msecs_to_jiffies(1000)); + if (!ret) + ret = -ETIMEDOUT; + else if (ret > 0) + ret = 0; + + return ret; +} + +static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev, + struct drm_file *file_priv, + unsigned int counterset) +{ + struct panfrost_file_priv *user = file_priv->driver_priv; + struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; + struct drm_gem_shmem_object *bo; + u32 cfg, as; + int ret; + + if (user == perfcnt->user) + return 0; + else if (perfcnt->user) + return -EBUSY; + + ret = pm_runtime_get_sync(pfdev->dev); + if (ret < 0) + goto err_put_pm; + + bo = drm_gem_shmem_create(pfdev->ddev, perfcnt->bosize); + if (IS_ERR(bo)) { + ret = PTR_ERR(bo); + goto err_put_pm; + } + + /* Map the perfcnt buf in the address space attached to file_priv. */ + ret = panfrost_gem_open(&bo->base, file_priv); + if (ret) + goto err_put_bo; + + perfcnt->mapping = panfrost_gem_mapping_get(to_panfrost_bo(&bo->base), + user); + if (!perfcnt->mapping) { + ret = -EINVAL; + goto err_close_bo; + } + + perfcnt->buf = drm_gem_shmem_vmap(&bo->base); + if (IS_ERR(perfcnt->buf)) { + ret = PTR_ERR(perfcnt->buf); + goto err_put_mapping; + } + + /* + * Invalidate the cache and clear the counters to start from a fresh + * state. + */ + reinit_completion(&pfdev->perfcnt->dump_comp); + gpu_write(pfdev, GPU_INT_CLEAR, + GPU_IRQ_CLEAN_CACHES_COMPLETED | + GPU_IRQ_PERFCNT_SAMPLE_COMPLETED); + gpu_write(pfdev, GPU_CMD, GPU_CMD_PERFCNT_CLEAR); + gpu_write(pfdev, GPU_CMD, GPU_CMD_CLEAN_INV_CACHES); + ret = wait_for_completion_timeout(&pfdev->perfcnt->dump_comp, + msecs_to_jiffies(1000)); + if (!ret) { + ret = -ETIMEDOUT; + goto err_vunmap; + } + + perfcnt->user = user; + + as = panfrost_mmu_as_get(pfdev, perfcnt->mapping->mmu); + cfg = GPU_PERFCNT_CFG_AS(as) | + GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_MANUAL); + + /* + * Bifrost GPUs have 2 set of counters, but we're only interested by + * the first one for now. + */ + if (panfrost_model_is_bifrost(pfdev)) + cfg |= GPU_PERFCNT_CFG_SETSEL(counterset); + + gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0xffffffff); + gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0xffffffff); + gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0xffffffff); + + /* + * Due to PRLAM-8186 we need to disable the Tiler before we enable HW + * counters. + */ + if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8186)) + gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0); + else + gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0xffffffff); + + gpu_write(pfdev, GPU_PERFCNT_CFG, cfg); + + if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8186)) + gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0xffffffff); + + /* The BO ref is retained by the mapping. */ + drm_gem_object_put(&bo->base); + + return 0; + +err_vunmap: + drm_gem_shmem_vunmap(&bo->base, perfcnt->buf); +err_put_mapping: + panfrost_gem_mapping_put(perfcnt->mapping); +err_close_bo: + panfrost_gem_close(&bo->base, file_priv); +err_put_bo: + drm_gem_object_put(&bo->base); +err_put_pm: + pm_runtime_put(pfdev->dev); + return ret; +} + +static int panfrost_perfcnt_disable_locked(struct panfrost_device *pfdev, + struct drm_file *file_priv) +{ + struct panfrost_file_priv *user = file_priv->driver_priv; + struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; + + if (user != perfcnt->user) + return -EINVAL; + + gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0x0); + gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0x0); + gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0x0); + gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0); + gpu_write(pfdev, GPU_PERFCNT_CFG, + GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF)); + + perfcnt->user = NULL; + drm_gem_shmem_vunmap(&perfcnt->mapping->obj->base.base, perfcnt->buf); + perfcnt->buf = NULL; + panfrost_gem_close(&perfcnt->mapping->obj->base.base, file_priv); + panfrost_mmu_as_put(pfdev, perfcnt->mapping->mmu); + panfrost_gem_mapping_put(perfcnt->mapping); + perfcnt->mapping = NULL; + pm_runtime_mark_last_busy(pfdev->dev); + pm_runtime_put_autosuspend(pfdev->dev); + + return 0; +} + +int panfrost_ioctl_perfcnt_enable(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct panfrost_device *pfdev = dev->dev_private; + struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; + struct drm_panfrost_perfcnt_enable *req = data; + int ret; + + ret = panfrost_unstable_ioctl_check(); + if (ret) + return ret; + + /* Only Bifrost GPUs have 2 set of counters. */ + if (req->counterset > (panfrost_model_is_bifrost(pfdev) ? 1 : 0)) + return -EINVAL; + + mutex_lock(&perfcnt->lock); + if (req->enable) + ret = panfrost_perfcnt_enable_locked(pfdev, file_priv, + req->counterset); + else + ret = panfrost_perfcnt_disable_locked(pfdev, file_priv); + mutex_unlock(&perfcnt->lock); + + return ret; +} + +int panfrost_ioctl_perfcnt_dump(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct panfrost_device *pfdev = dev->dev_private; + struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; + struct drm_panfrost_perfcnt_dump *req = data; + void __user *user_ptr = (void __user *)(uintptr_t)req->buf_ptr; + int ret; + + ret = panfrost_unstable_ioctl_check(); + if (ret) + return ret; + + mutex_lock(&perfcnt->lock); + if (perfcnt->user != file_priv->driver_priv) { + ret = -EINVAL; + goto out; + } + + ret = panfrost_perfcnt_dump_locked(pfdev); + if (ret) + goto out; + + if (copy_to_user(user_ptr, perfcnt->buf, perfcnt->bosize)) + ret = -EFAULT; + +out: + mutex_unlock(&perfcnt->lock); + + return ret; +} + +void panfrost_perfcnt_close(struct drm_file *file_priv) +{ + struct panfrost_file_priv *pfile = file_priv->driver_priv; + struct panfrost_device *pfdev = pfile->pfdev; + struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; + + pm_runtime_get_sync(pfdev->dev); + mutex_lock(&perfcnt->lock); + if (perfcnt->user == pfile) + panfrost_perfcnt_disable_locked(pfdev, file_priv); + mutex_unlock(&perfcnt->lock); + pm_runtime_mark_last_busy(pfdev->dev); + pm_runtime_put_autosuspend(pfdev->dev); +} + +int panfrost_perfcnt_init(struct panfrost_device *pfdev) +{ + struct panfrost_perfcnt *perfcnt; + size_t size; + + if (panfrost_has_hw_feature(pfdev, HW_FEATURE_V4)) { + unsigned int ncoregroups; + + ncoregroups = hweight64(pfdev->features.l2_present); + size = ncoregroups * BLOCKS_PER_COREGROUP * + COUNTERS_PER_BLOCK * BYTES_PER_COUNTER; + } else { + unsigned int nl2c, ncores; + + /* + * TODO: define a macro to extract the number of l2 caches from + * mem_features. + */ + nl2c = ((pfdev->features.mem_features >> 8) & GENMASK(3, 0)) + 1; + + /* + * shader_present might be sparse, but the counters layout + * forces to dump unused regions too, hence the fls64() call + * instead of hweight64(). + */ + ncores = fls64(pfdev->features.shader_present); + + /* + * There's always one JM and one Tiler block, hence the '+ 2' + * here. + */ + size = (nl2c + ncores + 2) * + COUNTERS_PER_BLOCK * BYTES_PER_COUNTER; + } + + perfcnt = devm_kzalloc(pfdev->dev, sizeof(*perfcnt), GFP_KERNEL); + if (!perfcnt) + return -ENOMEM; + + perfcnt->bosize = size; + + /* Start with everything disabled. */ + gpu_write(pfdev, GPU_PERFCNT_CFG, + GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF)); + gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0); + gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0); + gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0); + gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0); + + init_completion(&perfcnt->dump_comp); + mutex_init(&perfcnt->lock); + pfdev->perfcnt = perfcnt; + + return 0; +} + +void panfrost_perfcnt_fini(struct panfrost_device *pfdev) +{ + /* Disable everything before leaving. */ + gpu_write(pfdev, GPU_PERFCNT_CFG, + GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF)); + gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0); + gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0); + gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0); + gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0); +} diff --git a/drivers/gpu/drm/panfrost/panfrost_perfcnt.h b/drivers/gpu/drm/panfrost/panfrost_perfcnt.h new file mode 100644 index 000000000..8bbcf5f5f --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_perfcnt.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright 2019 Collabora Ltd */ +#ifndef __PANFROST_PERFCNT_H__ +#define __PANFROST_PERFCNT_H__ + +#include "panfrost_device.h" + +void panfrost_perfcnt_sample_done(struct panfrost_device *pfdev); +void panfrost_perfcnt_clean_cache_done(struct panfrost_device *pfdev); +int panfrost_perfcnt_init(struct panfrost_device *pfdev); +void panfrost_perfcnt_fini(struct panfrost_device *pfdev); +void panfrost_perfcnt_close(struct drm_file *file_priv); +int panfrost_ioctl_perfcnt_enable(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int panfrost_ioctl_perfcnt_dump(struct drm_device *dev, void *data, + struct drm_file *file_priv); + +#endif diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h b/drivers/gpu/drm/panfrost/panfrost_regs.h new file mode 100644 index 000000000..2ae3a4d30 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_regs.h @@ -0,0 +1,326 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */ +/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */ +/* + * Register definitions based on mali_midg_regmap.h + * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. + */ +#ifndef __PANFROST_REGS_H__ +#define __PANFROST_REGS_H__ + +#define GPU_ID 0x00 +#define GPU_L2_FEATURES 0x004 /* (RO) Level 2 cache features */ +#define GPU_CORE_FEATURES 0x008 /* (RO) Shader Core Features */ +#define GPU_TILER_FEATURES 0x00C /* (RO) Tiler Features */ +#define GPU_MEM_FEATURES 0x010 /* (RO) Memory system features */ +#define GROUPS_L2_COHERENT BIT(0) /* Cores groups are l2 coherent */ + +#define GPU_MMU_FEATURES 0x014 /* (RO) MMU features */ +#define GPU_AS_PRESENT 0x018 /* (RO) Address space slots present */ +#define GPU_JS_PRESENT 0x01C /* (RO) Job slots present */ + +#define GPU_INT_RAWSTAT 0x20 +#define GPU_INT_CLEAR 0x24 +#define GPU_INT_MASK 0x28 +#define GPU_INT_STAT 0x2c +#define GPU_IRQ_FAULT BIT(0) +#define GPU_IRQ_MULTIPLE_FAULT BIT(7) +#define GPU_IRQ_RESET_COMPLETED BIT(8) +#define GPU_IRQ_POWER_CHANGED BIT(9) +#define GPU_IRQ_POWER_CHANGED_ALL BIT(10) +#define GPU_IRQ_PERFCNT_SAMPLE_COMPLETED BIT(16) +#define GPU_IRQ_CLEAN_CACHES_COMPLETED BIT(17) +#define GPU_IRQ_MASK_ALL \ + (GPU_IRQ_FAULT |\ + GPU_IRQ_MULTIPLE_FAULT |\ + GPU_IRQ_RESET_COMPLETED |\ + GPU_IRQ_POWER_CHANGED |\ + GPU_IRQ_POWER_CHANGED_ALL |\ + GPU_IRQ_PERFCNT_SAMPLE_COMPLETED |\ + GPU_IRQ_CLEAN_CACHES_COMPLETED) +#define GPU_IRQ_MASK_ERROR \ + ( \ + GPU_IRQ_FAULT |\ + GPU_IRQ_MULTIPLE_FAULT) +#define GPU_CMD 0x30 +#define GPU_CMD_SOFT_RESET 0x01 +#define GPU_CMD_PERFCNT_CLEAR 0x03 +#define GPU_CMD_PERFCNT_SAMPLE 0x04 +#define GPU_CMD_CLEAN_CACHES 0x07 +#define GPU_CMD_CLEAN_INV_CACHES 0x08 +#define GPU_STATUS 0x34 +#define GPU_STATUS_PRFCNT_ACTIVE BIT(2) +#define GPU_LATEST_FLUSH_ID 0x38 +#define GPU_PWR_KEY 0x50 /* (WO) Power manager key register */ +#define GPU_PWR_KEY_UNLOCK 0x2968A819 +#define GPU_PWR_OVERRIDE0 0x54 /* (RW) Power manager override settings */ +#define GPU_PWR_OVERRIDE1 0x58 /* (RW) Power manager override settings */ +#define GPU_FAULT_STATUS 0x3C +#define GPU_FAULT_ADDRESS_LO 0x40 +#define GPU_FAULT_ADDRESS_HI 0x44 + +#define GPU_PERFCNT_BASE_LO 0x60 +#define GPU_PERFCNT_BASE_HI 0x64 +#define GPU_PERFCNT_CFG 0x68 +#define GPU_PERFCNT_CFG_MODE(x) (x) +#define GPU_PERFCNT_CFG_MODE_OFF 0 +#define GPU_PERFCNT_CFG_MODE_MANUAL 1 +#define GPU_PERFCNT_CFG_MODE_TILE 2 +#define GPU_PERFCNT_CFG_AS(x) ((x) << 4) +#define GPU_PERFCNT_CFG_SETSEL(x) ((x) << 8) +#define GPU_PRFCNT_JM_EN 0x6c +#define GPU_PRFCNT_SHADER_EN 0x70 +#define GPU_PRFCNT_TILER_EN 0x74 +#define GPU_PRFCNT_MMU_L2_EN 0x7c + +#define GPU_THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */ +#define GPU_THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */ +#define GPU_THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */ +#define GPU_THREAD_FEATURES 0x0AC /* (RO) Thread features */ +#define GPU_THREAD_TLS_ALLOC 0x310 /* (RO) Number of threads per core that + * TLS must be allocated for */ + +#define GPU_TEXTURE_FEATURES(n) (0x0B0 + ((n) * 4)) +#define GPU_JS_FEATURES(n) (0x0C0 + ((n) * 4)) + +#define GPU_SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ +#define GPU_SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ +#define GPU_TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */ +#define GPU_TILER_PRESENT_HI 0x114 /* (RO) Tiler core present bitmap, high word */ + +#define GPU_L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ +#define GPU_L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */ + +#define GPU_COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ +#define COHERENCY_ACE_LITE BIT(0) +#define COHERENCY_ACE BIT(1) + +#define GPU_STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */ +#define GPU_STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */ + +#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */ +#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */ + +#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */ +#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */ + +#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */ +#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */ + +#define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */ +#define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */ + + +#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */ +#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */ + +#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */ +#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */ + +#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */ +#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */ + +#define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */ +#define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */ + + +#define SHADER_PWROFF_LO 0x1C0 /* (WO) Shader core power off bitmap, low word */ +#define SHADER_PWROFF_HI 0x1C4 /* (WO) Shader core power off bitmap, high word */ + +#define TILER_PWROFF_LO 0x1D0 /* (WO) Tiler core power off bitmap, low word */ +#define TILER_PWROFF_HI 0x1D4 /* (WO) Tiler core power off bitmap, high word */ + +#define L2_PWROFF_LO 0x1E0 /* (WO) Level 2 cache power off bitmap, low word */ +#define L2_PWROFF_HI 0x1E4 /* (WO) Level 2 cache power off bitmap, high word */ + +#define STACK_PWROFF_LO 0xE30 /* (RO) Core stack power off bitmap, low word */ +#define STACK_PWROFF_HI 0xE34 /* (RO) Core stack power off bitmap, high word */ + + +#define SHADER_PWRTRANS_LO 0x200 /* (RO) Shader core power transition bitmap, low word */ +#define SHADER_PWRTRANS_HI 0x204 /* (RO) Shader core power transition bitmap, high word */ + +#define TILER_PWRTRANS_LO 0x210 /* (RO) Tiler core power transition bitmap, low word */ +#define TILER_PWRTRANS_HI 0x214 /* (RO) Tiler core power transition bitmap, high word */ + +#define L2_PWRTRANS_LO 0x220 /* (RO) Level 2 cache power transition bitmap, low word */ +#define L2_PWRTRANS_HI 0x224 /* (RO) Level 2 cache power transition bitmap, high word */ + +#define STACK_PWRTRANS_LO 0xE40 /* (RO) Core stack power transition bitmap, low word */ +#define STACK_PWRTRANS_HI 0xE44 /* (RO) Core stack power transition bitmap, high word */ + + +#define SHADER_PWRACTIVE_LO 0x240 /* (RO) Shader core active bitmap, low word */ +#define SHADER_PWRACTIVE_HI 0x244 /* (RO) Shader core active bitmap, high word */ + +#define TILER_PWRACTIVE_LO 0x250 /* (RO) Tiler core active bitmap, low word */ +#define TILER_PWRACTIVE_HI 0x254 /* (RO) Tiler core active bitmap, high word */ + +#define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */ +#define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ + +#define GPU_JM_CONFIG 0xF00 /* (RW) Job Manager configuration register (Implementation specific register) */ +#define GPU_SHADER_CONFIG 0xF04 /* (RW) Shader core configuration settings (Implementation specific register) */ +#define GPU_TILER_CONFIG 0xF08 /* (RW) Tiler core configuration settings (Implementation specific register) */ +#define GPU_L2_MMU_CONFIG 0xF0C /* (RW) Configuration of the L2 cache and MMU (Implementation specific register) */ + +/* L2_MMU_CONFIG register */ +#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT 23 +#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT 24 +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_QUARTER (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_HALF (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) + +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT 26 +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_OCTANT (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_QUARTER (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_HALF (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) + +#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS_SHIFT 12 +#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS (0x7 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) + +#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES_SHIFT 15 +#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES (0x7 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) + +/* SHADER_CONFIG register */ +#define SC_ALT_COUNTERS BIT(3) +#define SC_OVERRIDE_FWD_PIXEL_KILL BIT(4) +#define SC_SDC_DISABLE_OQ_DISCARD BIT(6) +#define SC_LS_ALLOW_ATTR_TYPES BIT(16) +#define SC_LS_PAUSEBUFFER_DISABLE BIT(16) +#define SC_TLS_HASH_ENABLE BIT(17) +#define SC_LS_ATTR_CHECK_DISABLE BIT(18) +#define SC_ENABLE_TEXGRD_FLAGS BIT(25) +/* End SHADER_CONFIG register */ + +/* TILER_CONFIG register */ +#define TC_CLOCK_GATE_OVERRIDE BIT(0) + +/* JM_CONFIG register */ +#define JM_TIMESTAMP_OVERRIDE BIT(0) +#define JM_CLOCK_GATE_OVERRIDE BIT(1) +#define JM_JOB_THROTTLE_ENABLE BIT(2) +#define JM_JOB_THROTTLE_LIMIT_SHIFT 3 +#define JM_MAX_JOB_THROTTLE_LIMIT 0x3F +#define JM_FORCE_COHERENCY_FEATURES_SHIFT 2 +#define JM_IDVS_GROUP_SIZE_SHIFT 16 +#define JM_MAX_IDVS_GROUP_SIZE 0x3F + + +/* Job Control regs */ +#define JOB_INT_RAWSTAT 0x1000 +#define JOB_INT_CLEAR 0x1004 +#define JOB_INT_MASK 0x1008 +#define JOB_INT_STAT 0x100c +#define JOB_INT_JS_STATE 0x1010 +#define JOB_INT_THROTTLE 0x1014 + +#define MK_JS_MASK(j) (0x10001 << (j)) +#define JOB_INT_MASK_ERR(j) BIT((j) + 16) +#define JOB_INT_MASK_DONE(j) BIT(j) + +#define JS_BASE 0x1800 +#define JS_HEAD_LO(n) (JS_BASE + ((n) * 0x80) + 0x00) +#define JS_HEAD_HI(n) (JS_BASE + ((n) * 0x80) + 0x04) +#define JS_TAIL_LO(n) (JS_BASE + ((n) * 0x80) + 0x08) +#define JS_TAIL_HI(n) (JS_BASE + ((n) * 0x80) + 0x0c) +#define JS_AFFINITY_LO(n) (JS_BASE + ((n) * 0x80) + 0x10) +#define JS_AFFINITY_HI(n) (JS_BASE + ((n) * 0x80) + 0x14) +#define JS_CONFIG(n) (JS_BASE + ((n) * 0x80) + 0x18) +#define JS_XAFFINITY(n) (JS_BASE + ((n) * 0x80) + 0x1c) +#define JS_COMMAND(n) (JS_BASE + ((n) * 0x80) + 0x20) +#define JS_STATUS(n) (JS_BASE + ((n) * 0x80) + 0x24) +#define JS_HEAD_NEXT_LO(n) (JS_BASE + ((n) * 0x80) + 0x40) +#define JS_HEAD_NEXT_HI(n) (JS_BASE + ((n) * 0x80) + 0x44) +#define JS_AFFINITY_NEXT_LO(n) (JS_BASE + ((n) * 0x80) + 0x50) +#define JS_AFFINITY_NEXT_HI(n) (JS_BASE + ((n) * 0x80) + 0x54) +#define JS_CONFIG_NEXT(n) (JS_BASE + ((n) * 0x80) + 0x58) +#define JS_COMMAND_NEXT(n) (JS_BASE + ((n) * 0x80) + 0x60) +#define JS_FLUSH_ID_NEXT(n) (JS_BASE + ((n) * 0x80) + 0x70) + +/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */ +#define JS_CONFIG_START_FLUSH_CLEAN BIT(8) +#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8) +#define JS_CONFIG_START_MMU BIT(10) +#define JS_CONFIG_JOB_CHAIN_FLAG BIT(11) +#define JS_CONFIG_END_FLUSH_CLEAN BIT(12) +#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE (3u << 12) +#define JS_CONFIG_ENABLE_FLUSH_REDUCTION BIT(14) +#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK BIT(15) +#define JS_CONFIG_THREAD_PRI(n) ((n) << 16) + +#define JS_COMMAND_NOP 0x00 +#define JS_COMMAND_START 0x01 +#define JS_COMMAND_SOFT_STOP 0x02 /* Gently stop processing a job chain */ +#define JS_COMMAND_HARD_STOP 0x03 /* Rudely stop processing a job chain */ +#define JS_COMMAND_SOFT_STOP_0 0x04 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */ +#define JS_COMMAND_HARD_STOP_0 0x05 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */ +#define JS_COMMAND_SOFT_STOP_1 0x06 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */ +#define JS_COMMAND_HARD_STOP_1 0x07 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */ + +#define JS_STATUS_EVENT_ACTIVE 0x08 + + +/* MMU regs */ +#define MMU_INT_RAWSTAT 0x2000 +#define MMU_INT_CLEAR 0x2004 +#define MMU_INT_MASK 0x2008 +#define MMU_INT_STAT 0x200c + +/* AS_COMMAND register commands */ +#define AS_COMMAND_NOP 0x00 /* NOP Operation */ +#define AS_COMMAND_UPDATE 0x01 /* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */ +#define AS_COMMAND_LOCK 0x02 /* Issue a lock region command to all MMUs */ +#define AS_COMMAND_UNLOCK 0x03 /* Issue a flush region command to all MMUs */ +#define AS_COMMAND_FLUSH 0x04 /* Flush all L2 caches then issue a flush region command to all MMUs + (deprecated - only for use with T60x) */ +#define AS_COMMAND_FLUSH_PT 0x04 /* Flush all L2 caches then issue a flush region command to all MMUs */ +#define AS_COMMAND_FLUSH_MEM 0x05 /* Wait for memory accesses to complete, flush all the L1s cache then + flush all L2 caches then issue a flush region command to all MMUs */ + +#define MMU_AS(as) (0x2400 + ((as) << 6)) + +#define AS_TRANSTAB_LO(as) (MMU_AS(as) + 0x00) /* (RW) Translation Table Base Address for address space n, low word */ +#define AS_TRANSTAB_HI(as) (MMU_AS(as) + 0x04) /* (RW) Translation Table Base Address for address space n, high word */ +#define AS_MEMATTR_LO(as) (MMU_AS(as) + 0x08) /* (RW) Memory attributes for address space n, low word. */ +#define AS_MEMATTR_HI(as) (MMU_AS(as) + 0x0C) /* (RW) Memory attributes for address space n, high word. */ +#define AS_LOCKADDR_LO(as) (MMU_AS(as) + 0x10) /* (RW) Lock region address for address space n, low word */ +#define AS_LOCKADDR_HI(as) (MMU_AS(as) + 0x14) /* (RW) Lock region address for address space n, high word */ +#define AS_COMMAND(as) (MMU_AS(as) + 0x18) /* (WO) MMU command register for address space n */ +#define AS_FAULTSTATUS(as) (MMU_AS(as) + 0x1C) /* (RO) MMU fault status register for address space n */ +#define AS_FAULTADDRESS_LO(as) (MMU_AS(as) + 0x20) /* (RO) Fault Address for address space n, low word */ +#define AS_FAULTADDRESS_HI(as) (MMU_AS(as) + 0x24) /* (RO) Fault Address for address space n, high word */ +#define AS_STATUS(as) (MMU_AS(as) + 0x28) /* (RO) Status flags for address space n */ +/* Additional Bifrost AS regsiters */ +#define AS_TRANSCFG_LO(as) (MMU_AS(as) + 0x30) /* (RW) Translation table configuration for address space n, low word */ +#define AS_TRANSCFG_HI(as) (MMU_AS(as) + 0x34) /* (RW) Translation table configuration for address space n, high word */ +#define AS_FAULTEXTRA_LO(as) (MMU_AS(as) + 0x38) /* (RO) Secondary fault address for address space n, low word */ +#define AS_FAULTEXTRA_HI(as) (MMU_AS(as) + 0x3C) /* (RO) Secondary fault address for address space n, high word */ + +/* + * Begin LPAE MMU TRANSTAB register values + */ +#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK 0xfffffffffffff000 +#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY 0x2 +#define AS_TRANSTAB_LPAE_ADRMODE_TABLE 0x3 +#define AS_TRANSTAB_LPAE_ADRMODE_MASK 0x3 +#define AS_TRANSTAB_LPAE_READ_INNER BIT(2) +#define AS_TRANSTAB_LPAE_SHARE_OUTER BIT(4) + +#define AS_STATUS_AS_ACTIVE 0x01 + +#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << 8) +#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0 << 8) +#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1 << 8) +#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2 << 8) +#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3 << 8) + +#define AS_LOCK_REGION_MIN_SIZE (1ULL << 15) + +#define gpu_write(dev, reg, data) writel(data, dev->iomem + reg) +#define gpu_read(dev, reg) readl(dev->iomem + reg) + +#endif |