summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/panfrost
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/panfrost')
-rw-r--r--drivers/gpu/drm/panfrost/Makefile2
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_debugfs.c21
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_debugfs.h14
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_devfreq.c23
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_devfreq.h3
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_device.c2
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_device.h15
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_drv.c66
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_gem.c30
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_gem.h7
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c29
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_gpu.c45
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_gpu.h4
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_job.c29
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_job.h5
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_mmu.c5
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_regs.h5
17 files changed, 282 insertions, 23 deletions
diff --git a/drivers/gpu/drm/panfrost/Makefile b/drivers/gpu/drm/panfrost/Makefile
index 7da2b3f02e..2c01c1e752 100644
--- a/drivers/gpu/drm/panfrost/Makefile
+++ b/drivers/gpu/drm/panfrost/Makefile
@@ -12,4 +12,6 @@ panfrost-y := \
panfrost_perfcnt.o \
panfrost_dump.o
+panfrost-$(CONFIG_DEBUG_FS) += panfrost_debugfs.o
+
obj-$(CONFIG_DRM_PANFROST) += panfrost.o
diff --git a/drivers/gpu/drm/panfrost/panfrost_debugfs.c b/drivers/gpu/drm/panfrost/panfrost_debugfs.c
new file mode 100644
index 0000000000..72d4286a6b
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_debugfs.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2023 Collabora ltd. */
+/* Copyright 2023 Amazon.com, Inc. or its affiliates. */
+
+#include <linux/debugfs.h>
+#include <linux/platform_device.h>
+#include <drm/drm_debugfs.h>
+#include <drm/drm_file.h>
+#include <drm/panfrost_drm.h>
+
+#include "panfrost_device.h"
+#include "panfrost_gpu.h"
+#include "panfrost_debugfs.h"
+
+void panfrost_debugfs_init(struct drm_minor *minor)
+{
+ struct drm_device *dev = minor->dev;
+ struct panfrost_device *pfdev = platform_get_drvdata(to_platform_device(dev->dev));
+
+ debugfs_create_atomic_t("profile", 0600, minor->debugfs_root, &pfdev->profile_mode);
+}
diff --git a/drivers/gpu/drm/panfrost/panfrost_debugfs.h b/drivers/gpu/drm/panfrost/panfrost_debugfs.h
new file mode 100644
index 0000000000..c5af5f3587
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2023 Collabora ltd.
+ * Copyright 2023 Amazon.com, Inc. or its affiliates.
+ */
+
+#ifndef PANFROST_DEBUGFS_H
+#define PANFROST_DEBUGFS_H
+
+#ifdef CONFIG_DEBUG_FS
+void panfrost_debugfs_init(struct drm_minor *minor);
+#endif
+
+#endif /* PANFROST_DEBUGFS_H */
diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
index e78de99e99..2d30da38c2 100644
--- a/drivers/gpu/drm/panfrost/panfrost_devfreq.c
+++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
@@ -29,14 +29,20 @@ static void panfrost_devfreq_update_utilization(struct panfrost_devfreq *pfdevfr
static int panfrost_devfreq_target(struct device *dev, unsigned long *freq,
u32 flags)
{
+ struct panfrost_device *ptdev = dev_get_drvdata(dev);
struct dev_pm_opp *opp;
+ int err;
opp = devfreq_recommended_opp(dev, freq, flags);
if (IS_ERR(opp))
return PTR_ERR(opp);
dev_pm_opp_put(opp);
- return dev_pm_opp_set_rate(dev, *freq);
+ err = dev_pm_opp_set_rate(dev, *freq);
+ if (!err)
+ ptdev->pfdevfreq.current_frequency = *freq;
+
+ return err;
}
static void panfrost_devfreq_reset(struct panfrost_devfreq *pfdevfreq)
@@ -117,6 +123,7 @@ int panfrost_devfreq_init(struct panfrost_device *pfdev)
struct devfreq *devfreq;
struct thermal_cooling_device *cooling;
struct panfrost_devfreq *pfdevfreq = &pfdev->pfdevfreq;
+ unsigned long freq = ULONG_MAX;
if (pfdev->comp->num_supplies > 1) {
/*
@@ -163,6 +170,14 @@ int panfrost_devfreq_init(struct panfrost_device *pfdev)
panfrost_devfreq_profile.initial_freq = cur_freq;
/*
+ * We could wait until panfrost_devfreq_target() to set this value, but
+ * since the simple_ondemand governor works asynchronously, there's a
+ * chance by the time someone opens the device's fdinfo file, current
+ * frequency hasn't been updated yet, so let's just do an early set.
+ */
+ pfdevfreq->current_frequency = cur_freq;
+
+ /*
* Set the recommend OPP this will enable and configure the regulator
* if any and will avoid a switch off by regulator_late_cleanup()
*/
@@ -172,6 +187,12 @@ int panfrost_devfreq_init(struct panfrost_device *pfdev)
return ret;
}
+ /* Find the fastest defined rate */
+ opp = dev_pm_opp_find_freq_floor(dev, &freq);
+ if (IS_ERR(opp))
+ return PTR_ERR(opp);
+ pfdevfreq->fast_rate = freq;
+
dev_pm_opp_put(opp);
/*
diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.h b/drivers/gpu/drm/panfrost/panfrost_devfreq.h
index 1514c1f9d9..48dbe185f2 100644
--- a/drivers/gpu/drm/panfrost/panfrost_devfreq.h
+++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.h
@@ -19,6 +19,9 @@ struct panfrost_devfreq {
struct devfreq_simple_ondemand_data gov_data;
bool opp_of_table_added;
+ unsigned long current_frequency;
+ unsigned long fast_rate;
+
ktime_t busy_time;
ktime_t idle_time;
ktime_t time_last_update;
diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c
index fa1a086a86..28f7046e1b 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.c
+++ b/drivers/gpu/drm/panfrost/panfrost_device.c
@@ -207,6 +207,8 @@ int panfrost_device_init(struct panfrost_device *pfdev)
spin_lock_init(&pfdev->as_lock);
+ spin_lock_init(&pfdev->cycle_counter.lock);
+
err = panfrost_clk_init(pfdev);
if (err) {
dev_err(pfdev->dev, "clk init failed %d\n", err);
diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
index b0126b9fba..1ef38f60d5 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.h
+++ b/drivers/gpu/drm/panfrost/panfrost_device.h
@@ -107,6 +107,7 @@ struct panfrost_device {
struct list_head scheduled_jobs;
struct panfrost_perfcnt *perfcnt;
+ atomic_t profile_mode;
struct mutex sched_lock;
@@ -118,9 +119,14 @@ struct panfrost_device {
struct mutex shrinker_lock;
struct list_head shrinker_list;
- struct shrinker shrinker;
+ struct shrinker *shrinker;
struct panfrost_devfreq pfdevfreq;
+
+ struct {
+ atomic_t use_count;
+ spinlock_t lock;
+ } cycle_counter;
};
struct panfrost_mmu {
@@ -135,12 +141,19 @@ struct panfrost_mmu {
struct list_head list;
};
+struct panfrost_engine_usage {
+ unsigned long long elapsed_ns[NUM_JOB_SLOTS];
+ unsigned long long cycles[NUM_JOB_SLOTS];
+};
+
struct panfrost_file_priv {
struct panfrost_device *pfdev;
struct drm_sched_entity sched_entity[NUM_JOB_SLOTS];
struct panfrost_mmu *mmu;
+
+ struct panfrost_engine_usage engine_usage;
};
static inline struct panfrost_device *to_panfrost_device(struct drm_device *ddev)
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index a2ab99698c..7cabf4e3d1 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -20,6 +20,7 @@
#include "panfrost_job.h"
#include "panfrost_gpu.h"
#include "panfrost_perfcnt.h"
+#include "panfrost_debugfs.h"
static bool unstable_ioctls;
module_param_unsafe(unstable_ioctls, bool, 0600);
@@ -267,6 +268,7 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
job->requirements = args->requirements;
job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev);
job->mmu = file_priv->mmu;
+ job->engine_usage = &file_priv->engine_usage;
slot = panfrost_job_get_slot(job);
@@ -523,7 +525,58 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = {
PANFROST_IOCTL(MADVISE, madvise, DRM_RENDER_ALLOW),
};
-DEFINE_DRM_GEM_FOPS(panfrost_drm_driver_fops);
+static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev,
+ struct panfrost_file_priv *panfrost_priv,
+ struct drm_printer *p)
+{
+ int i;
+
+ /*
+ * IMPORTANT NOTE: drm-cycles and drm-engine measurements are not
+ * accurate, as they only provide a rough estimation of the number of
+ * GPU cycles and CPU time spent in a given context. This is due to two
+ * different factors:
+ * - Firstly, we must consider the time the CPU and then the kernel
+ * takes to process the GPU interrupt, which means additional time and
+ * GPU cycles will be added in excess to the real figure.
+ * - Secondly, the pipelining done by the Job Manager (2 job slots per
+ * engine) implies there is no way to know exactly how much time each
+ * job spent on the GPU.
+ */
+
+ static const char * const engine_names[] = {
+ "fragment", "vertex-tiler", "compute-only"
+ };
+
+ BUILD_BUG_ON(ARRAY_SIZE(engine_names) != NUM_JOB_SLOTS);
+
+ for (i = 0; i < NUM_JOB_SLOTS - 1; i++) {
+ drm_printf(p, "drm-engine-%s:\t%llu ns\n",
+ engine_names[i], panfrost_priv->engine_usage.elapsed_ns[i]);
+ drm_printf(p, "drm-cycles-%s:\t%llu\n",
+ engine_names[i], panfrost_priv->engine_usage.cycles[i]);
+ drm_printf(p, "drm-maxfreq-%s:\t%lu Hz\n",
+ engine_names[i], pfdev->pfdevfreq.fast_rate);
+ drm_printf(p, "drm-curfreq-%s:\t%lu Hz\n",
+ engine_names[i], pfdev->pfdevfreq.current_frequency);
+ }
+}
+
+static void panfrost_show_fdinfo(struct drm_printer *p, struct drm_file *file)
+{
+ struct drm_device *dev = file->minor->dev;
+ struct panfrost_device *pfdev = dev->dev_private;
+
+ panfrost_gpu_show_fdinfo(pfdev, file->driver_priv, p);
+
+ drm_show_memory_stats(p, file);
+}
+
+static const struct file_operations panfrost_drm_driver_fops = {
+ .owner = THIS_MODULE,
+ DRM_GEM_FOPS,
+ .show_fdinfo = drm_show_fdinfo,
+};
/*
* Panfrost driver version:
@@ -535,6 +588,7 @@ static const struct drm_driver panfrost_drm_driver = {
.driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ,
.open = panfrost_open,
.postclose = panfrost_postclose,
+ .show_fdinfo = panfrost_show_fdinfo,
.ioctls = panfrost_drm_driver_ioctls,
.num_ioctls = ARRAY_SIZE(panfrost_drm_driver_ioctls),
.fops = &panfrost_drm_driver_fops,
@@ -546,6 +600,10 @@ static const struct drm_driver panfrost_drm_driver = {
.gem_create_object = panfrost_gem_create_object,
.gem_prime_import_sg_table = panfrost_gem_prime_import_sg_table,
+
+#ifdef CONFIG_DEBUG_FS
+ .debugfs_init = panfrost_debugfs_init,
+#endif
};
static int panfrost_probe(struct platform_device *pdev)
@@ -601,10 +659,14 @@ static int panfrost_probe(struct platform_device *pdev)
if (err < 0)
goto err_out1;
- panfrost_gem_shrinker_init(ddev);
+ err = panfrost_gem_shrinker_init(ddev);
+ if (err)
+ goto err_out2;
return 0;
+err_out2:
+ drm_dev_unregister(ddev);
err_out1:
pm_runtime_disable(pfdev->dev);
panfrost_device_fini(pfdev);
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
index 3c812fbd12..d47b40b82b 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
@@ -195,6 +195,34 @@ static int panfrost_gem_pin(struct drm_gem_object *obj)
return drm_gem_shmem_pin(&bo->base);
}
+static enum drm_gem_object_status panfrost_gem_status(struct drm_gem_object *obj)
+{
+ struct panfrost_gem_object *bo = to_panfrost_bo(obj);
+ enum drm_gem_object_status res = 0;
+
+ if (bo->base.base.import_attach || bo->base.pages)
+ res |= DRM_GEM_OBJECT_RESIDENT;
+
+ if (bo->base.madv == PANFROST_MADV_DONTNEED)
+ res |= DRM_GEM_OBJECT_PURGEABLE;
+
+ return res;
+}
+
+static size_t panfrost_gem_rss(struct drm_gem_object *obj)
+{
+ struct panfrost_gem_object *bo = to_panfrost_bo(obj);
+
+ if (bo->is_heap) {
+ return bo->heap_rss_size;
+ } else if (bo->base.pages) {
+ WARN_ON(bo->heap_rss_size);
+ return bo->base.base.size;
+ }
+
+ return 0;
+}
+
static const struct drm_gem_object_funcs panfrost_gem_funcs = {
.free = panfrost_gem_free_object,
.open = panfrost_gem_open,
@@ -206,6 +234,8 @@ static const struct drm_gem_object_funcs panfrost_gem_funcs = {
.vmap = drm_gem_shmem_object_vmap,
.vunmap = drm_gem_shmem_object_vunmap,
.mmap = drm_gem_shmem_object_mmap,
+ .status = panfrost_gem_status,
+ .rss = panfrost_gem_rss,
.vm_ops = &drm_gem_shmem_vm_ops,
};
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h
index ad2877eeec..7516b7ecf7 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem.h
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.h
@@ -36,6 +36,11 @@ struct panfrost_gem_object {
*/
atomic_t gpu_usecount;
+ /*
+ * Object chunk size currently mapped onto physical memory
+ */
+ size_t heap_rss_size;
+
bool noexec :1;
bool is_heap :1;
};
@@ -81,7 +86,7 @@ panfrost_gem_mapping_get(struct panfrost_gem_object *bo,
void panfrost_gem_mapping_put(struct panfrost_gem_mapping *mapping);
void panfrost_gem_teardown_mappings_locked(struct panfrost_gem_object *bo);
-void panfrost_gem_shrinker_init(struct drm_device *dev);
+int panfrost_gem_shrinker_init(struct drm_device *dev);
void panfrost_gem_shrinker_cleanup(struct drm_device *dev);
#endif /* __PANFROST_GEM_H__ */
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c
index 6a71a2555f..3d9f51bd48 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c
@@ -18,8 +18,7 @@
static unsigned long
panfrost_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
{
- struct panfrost_device *pfdev =
- container_of(shrinker, struct panfrost_device, shrinker);
+ struct panfrost_device *pfdev = shrinker->private_data;
struct drm_gem_shmem_object *shmem;
unsigned long count = 0;
@@ -65,8 +64,7 @@ unlock_mappings:
static unsigned long
panfrost_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
{
- struct panfrost_device *pfdev =
- container_of(shrinker, struct panfrost_device, shrinker);
+ struct panfrost_device *pfdev = shrinker->private_data;
struct drm_gem_shmem_object *shmem, *tmp;
unsigned long freed = 0;
@@ -97,13 +95,21 @@ panfrost_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
*
* This function registers and sets up the panfrost shrinker.
*/
-void panfrost_gem_shrinker_init(struct drm_device *dev)
+int panfrost_gem_shrinker_init(struct drm_device *dev)
{
struct panfrost_device *pfdev = dev->dev_private;
- pfdev->shrinker.count_objects = panfrost_gem_shrinker_count;
- pfdev->shrinker.scan_objects = panfrost_gem_shrinker_scan;
- pfdev->shrinker.seeks = DEFAULT_SEEKS;
- WARN_ON(register_shrinker(&pfdev->shrinker, "drm-panfrost"));
+
+ pfdev->shrinker = shrinker_alloc(0, "drm-panfrost");
+ if (!pfdev->shrinker)
+ return -ENOMEM;
+
+ pfdev->shrinker->count_objects = panfrost_gem_shrinker_count;
+ pfdev->shrinker->scan_objects = panfrost_gem_shrinker_scan;
+ pfdev->shrinker->private_data = pfdev;
+
+ shrinker_register(pfdev->shrinker);
+
+ return 0;
}
/**
@@ -116,7 +122,6 @@ void panfrost_gem_shrinker_cleanup(struct drm_device *dev)
{
struct panfrost_device *pfdev = dev->dev_private;
- if (pfdev->shrinker.nr_deferred) {
- unregister_shrinker(&pfdev->shrinker);
- }
+ if (pfdev->shrinker)
+ shrinker_free(pfdev->shrinker);
}
diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
index eca45b83e4..311cf4525e 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
@@ -78,6 +78,13 @@ int panfrost_gpu_soft_reset(struct panfrost_device *pfdev)
GPU_IRQ_PERFCNT_SAMPLE_COMPLETED |
GPU_IRQ_CLEAN_CACHES_COMPLETED);
+ /*
+ * All in-flight jobs should have released their cycle
+ * counter references upon reset, but let us make sure
+ */
+ if (drm_WARN_ON(pfdev->ddev, atomic_read(&pfdev->cycle_counter.use_count) != 0))
+ atomic_set(&pfdev->cycle_counter.use_count, 0);
+
return 0;
}
@@ -326,6 +333,40 @@ static void panfrost_gpu_init_features(struct panfrost_device *pfdev)
pfdev->features.shader_present, pfdev->features.l2_present);
}
+void panfrost_cycle_counter_get(struct panfrost_device *pfdev)
+{
+ if (atomic_inc_not_zero(&pfdev->cycle_counter.use_count))
+ return;
+
+ spin_lock(&pfdev->cycle_counter.lock);
+ if (atomic_inc_return(&pfdev->cycle_counter.use_count) == 1)
+ gpu_write(pfdev, GPU_CMD, GPU_CMD_CYCLE_COUNT_START);
+ spin_unlock(&pfdev->cycle_counter.lock);
+}
+
+void panfrost_cycle_counter_put(struct panfrost_device *pfdev)
+{
+ if (atomic_add_unless(&pfdev->cycle_counter.use_count, -1, 1))
+ return;
+
+ spin_lock(&pfdev->cycle_counter.lock);
+ if (atomic_dec_return(&pfdev->cycle_counter.use_count) == 0)
+ gpu_write(pfdev, GPU_CMD, GPU_CMD_CYCLE_COUNT_STOP);
+ spin_unlock(&pfdev->cycle_counter.lock);
+}
+
+unsigned long long panfrost_cycle_counter_read(struct panfrost_device *pfdev)
+{
+ u32 hi, lo;
+
+ do {
+ hi = gpu_read(pfdev, GPU_CYCLE_COUNT_HI);
+ lo = gpu_read(pfdev, GPU_CYCLE_COUNT_LO);
+ } while (hi != gpu_read(pfdev, GPU_CYCLE_COUNT_HI));
+
+ return ((u64)hi << 32) | lo;
+}
+
static u64 panfrost_get_core_mask(struct panfrost_device *pfdev)
{
u64 core_mask;
@@ -422,8 +463,8 @@ int panfrost_gpu_init(struct panfrost_device *pfdev)
dma_set_max_seg_size(pfdev->dev, UINT_MAX);
irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "gpu");
- if (irq <= 0)
- return -ENODEV;
+ if (irq < 0)
+ return irq;
err = devm_request_irq(pfdev->dev, irq, panfrost_gpu_irq_handler,
IRQF_SHARED, KBUILD_MODNAME "-gpu", pfdev);
diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.h b/drivers/gpu/drm/panfrost/panfrost_gpu.h
index 468c51e7e4..876fdad9f7 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gpu.h
+++ b/drivers/gpu/drm/panfrost/panfrost_gpu.h
@@ -16,6 +16,10 @@ int panfrost_gpu_soft_reset(struct panfrost_device *pfdev);
void panfrost_gpu_power_on(struct panfrost_device *pfdev);
void panfrost_gpu_power_off(struct panfrost_device *pfdev);
+void panfrost_cycle_counter_get(struct panfrost_device *pfdev);
+void panfrost_cycle_counter_put(struct panfrost_device *pfdev);
+unsigned long long panfrost_cycle_counter_read(struct panfrost_device *pfdev);
+
void panfrost_gpu_amlogic_quirk(struct panfrost_device *pfdev);
#endif
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index a8b4827dc4..ecd2e03514 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -159,6 +159,16 @@ panfrost_dequeue_job(struct panfrost_device *pfdev, int slot)
struct panfrost_job *job = pfdev->jobs[slot][0];
WARN_ON(!job);
+ if (job->is_profiled) {
+ if (job->engine_usage) {
+ job->engine_usage->elapsed_ns[slot] +=
+ ktime_to_ns(ktime_sub(ktime_get(), job->start_time));
+ job->engine_usage->cycles[slot] +=
+ panfrost_cycle_counter_read(pfdev) - job->start_cycles;
+ }
+ panfrost_cycle_counter_put(job->pfdev);
+ }
+
pfdev->jobs[slot][0] = pfdev->jobs[slot][1];
pfdev->jobs[slot][1] = NULL;
@@ -233,6 +243,13 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
subslot = panfrost_enqueue_job(pfdev, js, job);
/* Don't queue the job if a reset is in progress */
if (!atomic_read(&pfdev->reset.pending)) {
+ if (atomic_read(&pfdev->profile_mode)) {
+ panfrost_cycle_counter_get(pfdev);
+ job->is_profiled = true;
+ job->start_time = ktime_get();
+ job->start_cycles = panfrost_cycle_counter_read(pfdev);
+ }
+
job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START);
dev_dbg(pfdev->dev,
"JS: Submitting atom %p to js[%d][%d] with head=0x%llx AS %d",
@@ -660,10 +677,14 @@ panfrost_reset(struct panfrost_device *pfdev,
* stuck jobs. Let's make sure the PM counters stay balanced by
* manually calling pm_runtime_put_noidle() and
* panfrost_devfreq_record_idle() for each stuck job.
+ * Let's also make sure the cycle counting register's refcnt is
+ * kept balanced to prevent it from running forever
*/
spin_lock(&pfdev->js->job_lock);
for (i = 0; i < NUM_JOB_SLOTS; i++) {
for (j = 0; j < ARRAY_SIZE(pfdev->jobs[0]) && pfdev->jobs[i][j]; j++) {
+ if (pfdev->jobs[i][j]->is_profiled)
+ panfrost_cycle_counter_put(pfdev->jobs[i][j]->pfdev);
pm_runtime_put_noidle(pfdev->dev);
panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
}
@@ -810,8 +831,8 @@ int panfrost_job_init(struct panfrost_device *pfdev)
spin_lock_init(&js->job_lock);
js->irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "job");
- if (js->irq <= 0)
- return -ENODEV;
+ if (js->irq < 0)
+ return js->irq;
ret = devm_request_threaded_irq(pfdev->dev, js->irq,
panfrost_job_irq_handler,
@@ -832,6 +853,7 @@ int panfrost_job_init(struct panfrost_device *pfdev)
ret = drm_sched_init(&js->queue[j].sched,
&panfrost_sched_ops,
+ DRM_SCHED_PRIORITY_COUNT,
nentries, 0,
msecs_to_jiffies(JOB_TIMEOUT_MS),
pfdev->reset.wq,
@@ -926,6 +948,9 @@ void panfrost_job_close(struct panfrost_file_priv *panfrost_priv)
}
job_write(pfdev, JS_COMMAND(i), cmd);
+
+ /* Jobs can outlive their file context */
+ job->engine_usage = NULL;
}
}
spin_unlock(&pfdev->js->job_lock);
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h
index 8becc1ba0e..17ff808dba 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.h
+++ b/drivers/gpu/drm/panfrost/panfrost_job.h
@@ -32,6 +32,11 @@ struct panfrost_job {
/* Fence to be signaled by drm-sched once its done with the job */
struct dma_fence *render_done_fence;
+
+ struct panfrost_engine_usage *engine_usage;
+ bool is_profiled;
+ ktime_t start_time;
+ u64 start_cycles;
};
int panfrost_job_init(struct panfrost_device *pfdev);
diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
index c0123d09f6..846dd697c4 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -522,6 +522,7 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
IOMMU_WRITE | IOMMU_READ | IOMMU_NOEXEC, sgt);
bomapping->active = true;
+ bo->heap_rss_size += SZ_2M;
dev_dbg(pfdev->dev, "mapped page fault @ AS%d %llx", as, addr);
@@ -755,8 +756,8 @@ int panfrost_mmu_init(struct panfrost_device *pfdev)
int err, irq;
irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "mmu");
- if (irq <= 0)
- return -ENODEV;
+ if (irq < 0)
+ return irq;
err = devm_request_threaded_irq(pfdev->dev, irq,
panfrost_mmu_irq_handler,
diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h b/drivers/gpu/drm/panfrost/panfrost_regs.h
index 919f44ac85..55ec807550 100644
--- a/drivers/gpu/drm/panfrost/panfrost_regs.h
+++ b/drivers/gpu/drm/panfrost/panfrost_regs.h
@@ -46,6 +46,8 @@
#define GPU_CMD_SOFT_RESET 0x01
#define GPU_CMD_PERFCNT_CLEAR 0x03
#define GPU_CMD_PERFCNT_SAMPLE 0x04
+#define GPU_CMD_CYCLE_COUNT_START 0x05
+#define GPU_CMD_CYCLE_COUNT_STOP 0x06
#define GPU_CMD_CLEAN_CACHES 0x07
#define GPU_CMD_CLEAN_INV_CACHES 0x08
#define GPU_STATUS 0x34
@@ -73,6 +75,9 @@
#define GPU_PRFCNT_TILER_EN 0x74
#define GPU_PRFCNT_MMU_L2_EN 0x7c
+#define GPU_CYCLE_COUNT_LO 0x90
+#define GPU_CYCLE_COUNT_HI 0x94
+
#define GPU_THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */
#define GPU_THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */
#define GPU_THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */