Adding upstream version 6.6.15.upstream/6.6.15

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-11 08:27:49 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-11 08:27:49 +0000
commit: ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
tree: b2d64bc10158fdd5497876388cd68142ca374ed3 /drivers/hwtracing
parent: Initial commit. (diff)
download: linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz
linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip
87 files changed, 41881 insertions, 0 deletions
diff --git a/drivers/hwtracing/Kconfig b/drivers/hwtracing/Kconfig
new file mode 100644
index 0000000000..911ee97710
--- /dev/null
+++ b/drivers/hwtracing/Kconfig
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+menu "HW tracing support"
+
+source "drivers/hwtracing/stm/Kconfig"
+
+source "drivers/hwtracing/intel_th/Kconfig"
+
+source "drivers/hwtracing/ptt/Kconfig"
+
+endmenu
diff --git a/drivers/hwtracing/coresight/Kconfig b/drivers/hwtracing/coresight/Kconfig
new file mode 100644
index 0000000000..06f0a75941
--- /dev/null
+++ b/drivers/hwtracing/coresight/Kconfig
@@ -0,0 +1,250 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Coresight configuration
+#
+menuconfig CORESIGHT
+	tristate "CoreSight Tracing Support"
+	depends on ARM || ARM64
+	depends on OF || ACPI
+	select ARM_AMBA
+	select PERF_EVENTS
+	select CONFIGFS_FS
+	help
+	  This framework provides a kernel interface for the CoreSight debug
+	  and trace drivers to register themselves with. It's intended to build
+	  a topological view of the CoreSight components based on a DT
+	  specification and configure the right series of components when a
+	  trace source gets enabled.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called coresight.
+
+if CORESIGHT
+config CORESIGHT_LINKS_AND_SINKS
+	tristate "CoreSight Link and Sink drivers"
+	help
+	  This enables support for CoreSight link and sink drivers that are
+	  responsible for transporting and collecting the trace data
+	  respectively.  Link and sinks are dynamically aggregated with a trace
+	  entity at run time to form a complete trace path.
+
+	  To compile these drivers as modules, choose M here: the
+	  modules will be called coresight-funnel and coresight-replicator.
+
+config CORESIGHT_LINK_AND_SINK_TMC
+	tristate "Coresight generic TMC driver"
+
+	depends on CORESIGHT_LINKS_AND_SINKS
+	help
+	  This enables support for the Trace Memory Controller driver.
+	  Depending on its configuration the device can act as a link (embedded
+	  trace router - ETR) or sink (embedded trace FIFO).  The driver
+	  complies with the generic implementation of the component without
+	  special enhancement or added features.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called coresight-tmc.
+
+config CORESIGHT_CATU
+	tristate "Coresight Address Translation Unit (CATU) driver"
+	depends on CORESIGHT_LINK_AND_SINK_TMC
+	help
+	   Enable support for the Coresight Address Translation Unit (CATU).
+	   CATU supports a scatter gather table of 4K pages, with forward/backward
+	   lookup. CATU helps TMC ETR to use a large physically non-contiguous trace
+	   buffer by translating the addresses used by ETR to the physical address
+	   by looking up the provided table. CATU can also be used in pass-through
+	   mode where the address is not translated.
+
+	   To compile this driver as a module, choose M here: the
+	   module will be called coresight-catu.
+
+config CORESIGHT_SINK_TPIU
+	tristate "Coresight generic TPIU driver"
+	depends on CORESIGHT_LINKS_AND_SINKS
+	help
+	  This enables support for the Trace Port Interface Unit driver,
+	  responsible for bridging the gap between the on-chip coresight
+	  components and a trace for bridging the gap between the on-chip
+	  coresight components and a trace port collection engine, typically
+	  connected to an external host for use case capturing more traces than
+	  the on-board coresight memory can handle.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called coresight-tpiu.
+
+config CORESIGHT_SINK_ETBV10
+	tristate "Coresight ETBv1.0 driver"
+	depends on CORESIGHT_LINKS_AND_SINKS
+	help
+	  This enables support for the Embedded Trace Buffer version 1.0 driver
+	  that complies with the generic implementation of the component without
+	  special enhancement or added features.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called coresight-etb10.
+
+config CORESIGHT_SOURCE_ETM3X
+	tristate "CoreSight Embedded Trace Macrocell 3.x driver"
+	depends on !ARM64
+	select CORESIGHT_LINKS_AND_SINKS
+	help
+	  This driver provides support for processor ETM3.x and PTM1.x modules,
+	  which allows tracing the instructions that a processor is executing
+	  This is primarily useful for instruction level tracing.  Depending
+	  the ETM version data tracing may also be available.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called coresight-etm3x.
+
+config CORESIGHT_SOURCE_ETM4X
+	tristate "CoreSight ETMv4.x / ETE driver"
+	depends on ARM64
+	select CORESIGHT_LINKS_AND_SINKS
+	select PID_IN_CONTEXTIDR
+	help
+	  This driver provides support for the CoreSight Embedded Trace Macrocell
+	  version 4.x and the Embedded Trace Extensions (ETE). Both are CPU tracer
+	  modules, tracing the instructions that a processor is executing. This is
+	  primarily useful for instruction level tracing.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called coresight-etm4x.
+
+config ETM4X_IMPDEF_FEATURE
+	bool "Control implementation defined overflow support in ETM 4.x driver"
+	depends on CORESIGHT_SOURCE_ETM4X
+	help
+	  This control provides implementation define control for CoreSight
+	  ETM 4.x tracer module that can't reduce commit rate automatically.
+	  This avoids overflow between the ETM tracer module and the cpu core.
+
+config CORESIGHT_STM
+	tristate "CoreSight System Trace Macrocell driver"
+	depends on (ARM && !(CPU_32v3 || CPU_32v4 || CPU_32v4T)) || ARM64
+	select CORESIGHT_LINKS_AND_SINKS
+	select STM
+	help
+	  This driver provides support for hardware assisted software
+	  instrumentation based tracing. This is primarily used for
+	  logging useful software events or data coming from various entities
+	  in the system, possibly running different OSs
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called coresight-stm.
+
+config CORESIGHT_CPU_DEBUG
+	tristate "CoreSight CPU Debug driver"
+	depends on ARM || ARM64
+	depends on DEBUG_FS
+	help
+	  This driver provides support for coresight debugging module. This
+	  is primarily used to dump sample-based profiling registers when
+	  system triggers panic, the driver will parse context registers so
+	  can quickly get to know program counter (PC), secure state,
+	  exception level, etc. Before use debugging functionality, platform
+	  needs to ensure the clock domain and power domain are enabled
+	  properly, please refer Documentation/trace/coresight/coresight-cpu-debug.rst
+	  for detailed description and the example for usage.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called coresight-cpu-debug.
+
+config CORESIGHT_CPU_DEBUG_DEFAULT_ON
+	bool "Enable CoreSight CPU Debug by default"
+	depends on CORESIGHT_CPU_DEBUG
+	help
+	  Say Y here to enable the CoreSight Debug panic-debug by default. This
+	  can also be enabled via debugfs, but this ensures the debug feature
+	  is enabled as early as possible.
+
+	  Has the same effect as setting coresight_cpu_debug.enable=1 on the
+	  kernel command line.
+
+	  Say N if unsure.
+
+config CORESIGHT_CTI
+	tristate "CoreSight Cross Trigger Interface (CTI) driver"
+	depends on ARM || ARM64
+	help
+	  This driver provides support for CoreSight CTI and CTM components.
+	  These provide hardware triggering events between CoreSight trace
+	  source and sink components. These can be used to halt trace or
+	  inject events into the trace stream. CTI also provides a software
+	  control to trigger the same halt events. This can provide fast trace
+	  halt compared to disabling sources and sinks normally in driver
+	  software.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called coresight-cti.
+
+config CORESIGHT_CTI_INTEGRATION_REGS
+	bool "Access CTI CoreSight Integration Registers"
+	depends on CORESIGHT_CTI
+	help
+	  This option adds support for the CoreSight integration registers on
+	  this device. The integration registers allow the exploration of the
+	  CTI trigger connections between this and other devices.These
+	  registers are not used in normal operation and can leave devices in
+	  an inconsistent state.
+
+config CORESIGHT_TRBE
+	tristate "Trace Buffer Extension (TRBE) driver"
+	depends on ARM64 && CORESIGHT_SOURCE_ETM4X
+	help
+	  This driver provides support for percpu Trace Buffer Extension (TRBE).
+	  TRBE always needs to be used along with its corresponding percpu ETE
+	  component. ETE generates trace data which is then captured with TRBE.
+	  Unlike traditional sink devices, TRBE is a CPU feature accessible via
+	  system registers. But its explicit dependency with trace unit (ETE)
+	  requires it to be plugged in as a coresight sink device.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called coresight-trbe.
+
+config ULTRASOC_SMB
+	tristate "Ultrasoc system memory buffer drivers"
+	depends on ACPI || COMPILE_TEST
+	depends on ARM64 && CORESIGHT_LINKS_AND_SINKS
+	help
+	  This driver provides support for the Ultrasoc system memory buffer (SMB).
+	  SMB is responsible for receiving the trace data from Coresight ETM devices
+	  and storing them to a system buffer.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called ultrasoc-smb.
+
+config CORESIGHT_TPDM
+	tristate "CoreSight Trace, Profiling & Diagnostics Monitor driver"
+	select CORESIGHT_LINKS_AND_SINKS
+	select CORESIGHT_TPDA
+	help
+	  This driver provides support for configuring monitor. Monitors are
+	  primarily responsible for data set collection and support the
+	  ability to collect any permutation of data set types.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called coresight-tpdm.
+
+config CORESIGHT_TPDA
+	tristate "CoreSight Trace, Profiling & Diagnostics Aggregator driver"
+	help
+	  This driver provides support for configuring aggregator. This is
+	  primarily useful for pulling the data sets from one or more
+	  attached monitors and pushing the resultant data out. Multiple
+	  monitors are connected on different input ports of TPDA.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called coresight-tpda.
+
+config CORESIGHT_DUMMY
+	tristate "Dummy driver support"
+	help
+	  Enables support for dummy driver. Dummy driver can be used for
+	  CoreSight sources/sinks that are owned and configured by some
+	  other subsystem and use Linux drivers to configure rest of trace
+	  path.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called coresight-dummy.
+endif
diff --git a/drivers/hwtracing/coresight/Makefile b/drivers/hwtracing/coresight/Makefile
new file mode 100644
index 0000000000..995d3b2c76
--- /dev/null
+++ b/drivers/hwtracing/coresight/Makefile
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for CoreSight drivers.
+#
+obj-$(CONFIG_CORESIGHT) += coresight.o
+coresight-y := coresight-core.o  coresight-etm-perf.o coresight-platform.o \
+		coresight-sysfs.o coresight-syscfg.o coresight-config.o \
+		coresight-cfg-preload.o coresight-cfg-afdo.o \
+		coresight-syscfg-configfs.o coresight-trace-id.o
+obj-$(CONFIG_CORESIGHT_LINK_AND_SINK_TMC) += coresight-tmc.o
+coresight-tmc-y := coresight-tmc-core.o coresight-tmc-etf.o \
+		      coresight-tmc-etr.o
+obj-$(CONFIG_CORESIGHT_SINK_TPIU) += coresight-tpiu.o
+obj-$(CONFIG_CORESIGHT_SINK_ETBV10) += coresight-etb10.o
+obj-$(CONFIG_CORESIGHT_LINKS_AND_SINKS) += coresight-funnel.o \
+					   coresight-replicator.o
+obj-$(CONFIG_CORESIGHT_SOURCE_ETM3X) += coresight-etm3x.o
+coresight-etm3x-y := coresight-etm3x-core.o coresight-etm-cp14.o \
+		     coresight-etm3x-sysfs.o
+obj-$(CONFIG_CORESIGHT_SOURCE_ETM4X) += coresight-etm4x.o
+coresight-etm4x-y := coresight-etm4x-core.o coresight-etm4x-sysfs.o \
+			coresight-etm4x-cfg.o
+obj-$(CONFIG_CORESIGHT_STM) += coresight-stm.o
+obj-$(CONFIG_CORESIGHT_CPU_DEBUG) += coresight-cpu-debug.o
+obj-$(CONFIG_CORESIGHT_CATU) += coresight-catu.o
+obj-$(CONFIG_CORESIGHT_CTI) += coresight-cti.o
+obj-$(CONFIG_CORESIGHT_TRBE) += coresight-trbe.o
+obj-$(CONFIG_CORESIGHT_TPDM) += coresight-tpdm.o
+obj-$(CONFIG_CORESIGHT_TPDA) += coresight-tpda.o
+coresight-cti-y := coresight-cti-core.o	coresight-cti-platform.o \
+		   coresight-cti-sysfs.o
+obj-$(CONFIG_ULTRASOC_SMB) += ultrasoc-smb.o
+obj-$(CONFIG_CORESIGHT_DUMMY) += coresight-dummy.o
diff --git a/drivers/hwtracing/coresight/coresight-catu.c b/drivers/hwtracing/coresight/coresight-catu.c
new file mode 100644
index 0000000000..3949ded0d4
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-catu.c
@@ -0,0 +1,623 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018 Arm Limited. All rights reserved.
+ *
+ * Coresight Address Translation Unit support
+ *
+ * Author: Suzuki K Poulose <suzuki.poulose@arm.com>
+ */
+
+#include <linux/amba/bus.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include "coresight-catu.h"
+#include "coresight-priv.h"
+#include "coresight-tmc.h"
+
+#define csdev_to_catu_drvdata(csdev)	\
+	dev_get_drvdata(csdev->dev.parent)
+
+/* Verbose output for CATU table contents */
+#ifdef CATU_DEBUG
+#define catu_dbg(x, ...) dev_dbg(x, __VA_ARGS__)
+#else
+#define catu_dbg(x, ...) do {} while (0)
+#endif
+
+DEFINE_CORESIGHT_DEVLIST(catu_devs, "catu");
+
+struct catu_etr_buf {
+	struct tmc_sg_table *catu_table;
+	dma_addr_t sladdr;
+};
+
+/*
+ * CATU uses a page size of 4KB for page tables as well as data pages.
+ * Each 64bit entry in the table has the following format.
+ *
+ *	63			12	1  0
+ *	------------------------------------
+ *	|	 Address [63-12] | SBZ	| V|
+ *	------------------------------------
+ *
+ * Where bit[0] V indicates if the address is valid or not.
+ * Each 4K table pages have upto 256 data page pointers, taking upto 2K
+ * size. There are two Link pointers, pointing to the previous and next
+ * table pages respectively at the end of the 4K page. (i.e, entry 510
+ * and 511).
+ *  E.g, a table of two pages could look like :
+ *
+ *                 Table Page 0               Table Page 1
+ * SLADDR ===> x------------------x  x--> x-----------------x
+ * INADDR    ->|  Page 0      | V |  |    | Page 256    | V | <- INADDR+1M
+ *             |------------------|  |    |-----------------|
+ * INADDR+4K ->|  Page 1      | V |  |    |                 |
+ *             |------------------|  |    |-----------------|
+ *             |  Page 2      | V |  |    |                 |
+ *             |------------------|  |    |-----------------|
+ *             |   ...        | V |  |    |    ...          |
+ *             |------------------|  |    |-----------------|
+ * INADDR+1020K|  Page 255    | V |  |    |   Page 511  | V |
+ * SLADDR+2K==>|------------------|  |    |-----------------|
+ *             |  UNUSED      |   |  |    |                 |
+ *             |------------------|  |    |                 |
+ *             |  UNUSED      |   |  |    |                 |
+ *             |------------------|  |    |                 |
+ *             |    ...       |   |  |    |                 |
+ *             |------------------|  |    |-----------------|
+ *             |   IGNORED    | 0 |  |    | Table Page 0| 1 |
+ *             |------------------|  |    |-----------------|
+ *             |  Table Page 1| 1 |--x    | IGNORED     | 0 |
+ *             x------------------x       x-----------------x
+ * SLADDR+4K==>
+ *
+ * The base input address (used by the ETR, programmed in INADDR_{LO,HI})
+ * must be aligned to 1MB (the size addressable by a single page table).
+ * The CATU maps INADDR{LO:HI} to the first page in the table pointed
+ * to by SLADDR{LO:HI} and so on.
+ *
+ */
+typedef u64 cate_t;
+
+#define CATU_PAGE_SHIFT		12
+#define CATU_PAGE_SIZE		(1UL << CATU_PAGE_SHIFT)
+#define CATU_PAGES_PER_SYSPAGE	(PAGE_SIZE / CATU_PAGE_SIZE)
+
+/* Page pointers are only allocated in the first 2K half */
+#define CATU_PTRS_PER_PAGE	((CATU_PAGE_SIZE >> 1) / sizeof(cate_t))
+#define CATU_PTRS_PER_SYSPAGE	(CATU_PAGES_PER_SYSPAGE * CATU_PTRS_PER_PAGE)
+#define CATU_LINK_PREV		((CATU_PAGE_SIZE / sizeof(cate_t)) - 2)
+#define CATU_LINK_NEXT		((CATU_PAGE_SIZE / sizeof(cate_t)) - 1)
+
+#define CATU_ADDR_SHIFT		12
+#define CATU_ADDR_MASK		~(((cate_t)1 << CATU_ADDR_SHIFT) - 1)
+#define CATU_ENTRY_VALID	((cate_t)0x1)
+#define CATU_VALID_ENTRY(addr) \
+	(((cate_t)(addr) & CATU_ADDR_MASK) | CATU_ENTRY_VALID)
+#define CATU_ENTRY_ADDR(entry)	((cate_t)(entry) & ~((cate_t)CATU_ENTRY_VALID))
+
+/* CATU expects the INADDR to be aligned to 1M. */
+#define CATU_DEFAULT_INADDR	(1ULL << 20)
+
+/*
+ * catu_get_table : Retrieve the table pointers for the given @offset
+ * within the buffer. The buffer is wrapped around to a valid offset.
+ *
+ * Returns : The CPU virtual address for the beginning of the table
+ * containing the data page pointer for @offset. If @daddrp is not NULL,
+ * @daddrp points the DMA address of the beginning of the table.
+ */
+static inline cate_t *catu_get_table(struct tmc_sg_table *catu_table,
+				     unsigned long offset,
+				     dma_addr_t *daddrp)
+{
+	unsigned long buf_size = tmc_sg_table_buf_size(catu_table);
+	unsigned int table_nr, pg_idx, pg_offset;
+	struct tmc_pages *table_pages = &catu_table->table_pages;
+	void *ptr;
+
+	/* Make sure offset is within the range */
+	offset %= buf_size;
+
+	/*
+	 * Each table can address 1MB and a single kernel page can
+	 * contain "CATU_PAGES_PER_SYSPAGE" CATU tables.
+	 */
+	table_nr = offset >> 20;
+	/* Find the table page where the table_nr lies in */
+	pg_idx = table_nr / CATU_PAGES_PER_SYSPAGE;
+	pg_offset = (table_nr % CATU_PAGES_PER_SYSPAGE) * CATU_PAGE_SIZE;
+	if (daddrp)
+		*daddrp = table_pages->daddrs[pg_idx] + pg_offset;
+	ptr = page_address(table_pages->pages[pg_idx]);
+	return (cate_t *)((unsigned long)ptr + pg_offset);
+}
+
+#ifdef CATU_DEBUG
+static void catu_dump_table(struct tmc_sg_table *catu_table)
+{
+	int i;
+	cate_t *table;
+	unsigned long table_end, buf_size, offset = 0;
+
+	buf_size = tmc_sg_table_buf_size(catu_table);
+	dev_dbg(catu_table->dev,
+		"Dump table %p, tdaddr: %llx\n",
+		catu_table, catu_table->table_daddr);
+
+	while (offset < buf_size) {
+		table_end = offset + SZ_1M < buf_size ?
+			    offset + SZ_1M : buf_size;
+		table = catu_get_table(catu_table, offset, NULL);
+		for (i = 0; offset < table_end; i++, offset += CATU_PAGE_SIZE)
+			dev_dbg(catu_table->dev, "%d: %llx\n", i, table[i]);
+		dev_dbg(catu_table->dev, "Prev : %llx, Next: %llx\n",
+			table[CATU_LINK_PREV], table[CATU_LINK_NEXT]);
+		dev_dbg(catu_table->dev, "== End of sub-table ===");
+	}
+	dev_dbg(catu_table->dev, "== End of Table ===");
+}
+
+#else
+static inline void catu_dump_table(struct tmc_sg_table *catu_table)
+{
+}
+#endif
+
+static inline cate_t catu_make_entry(dma_addr_t addr)
+{
+	return addr ? CATU_VALID_ENTRY(addr) : 0;
+}
+
+/*
+ * catu_populate_table : Populate the given CATU table.
+ * The table is always populated as a circular table.
+ * i.e, the "prev" link of the "first" table points to the "last"
+ * table and the "next" link of the "last" table points to the
+ * "first" table. The buffer should be made linear by calling
+ * catu_set_table().
+ */
+static void
+catu_populate_table(struct tmc_sg_table *catu_table)
+{
+	int i;
+	int sys_pidx;	/* Index to current system data page */
+	int catu_pidx;	/* Index of CATU page within the system data page */
+	unsigned long offset, buf_size, table_end;
+	dma_addr_t data_daddr;
+	dma_addr_t prev_taddr, next_taddr, cur_taddr;
+	cate_t *table_ptr, *next_table;
+
+	buf_size = tmc_sg_table_buf_size(catu_table);
+	sys_pidx = catu_pidx = 0;
+	offset = 0;
+
+	table_ptr = catu_get_table(catu_table, 0, &cur_taddr);
+	prev_taddr = 0;	/* Prev link for the first table */
+
+	while (offset < buf_size) {
+		/*
+		 * The @offset is always 1M aligned here and we have an
+		 * empty table @table_ptr to fill. Each table can address
+		 * upto 1MB data buffer. The last table may have fewer
+		 * entries if the buffer size is not aligned.
+		 */
+		table_end = (offset + SZ_1M) < buf_size ?
+			    (offset + SZ_1M) : buf_size;
+		for (i = 0; offset < table_end;
+		     i++, offset += CATU_PAGE_SIZE) {
+
+			data_daddr = catu_table->data_pages.daddrs[sys_pidx] +
+				     catu_pidx * CATU_PAGE_SIZE;
+			catu_dbg(catu_table->dev,
+				"[table %5ld:%03d] 0x%llx\n",
+				(offset >> 20), i, data_daddr);
+			table_ptr[i] = catu_make_entry(data_daddr);
+			/* Move the pointers for data pages */
+			catu_pidx = (catu_pidx + 1) % CATU_PAGES_PER_SYSPAGE;
+			if (catu_pidx == 0)
+				sys_pidx++;
+		}
+
+		/*
+		 * If we have finished all the valid entries, fill the rest of
+		 * the table (i.e, last table page) with invalid entries,
+		 * to fail the lookups.
+		 */
+		if (offset == buf_size) {
+			memset(&table_ptr[i], 0,
+			       sizeof(cate_t) * (CATU_PTRS_PER_PAGE - i));
+			next_taddr = 0;
+		} else {
+			next_table = catu_get_table(catu_table,
+						    offset, &next_taddr);
+		}
+
+		table_ptr[CATU_LINK_PREV] = catu_make_entry(prev_taddr);
+		table_ptr[CATU_LINK_NEXT] = catu_make_entry(next_taddr);
+
+		catu_dbg(catu_table->dev,
+			"[table%5ld]: Cur: 0x%llx Prev: 0x%llx, Next: 0x%llx\n",
+			(offset >> 20) - 1,  cur_taddr, prev_taddr, next_taddr);
+
+		/* Update the prev/next addresses */
+		if (next_taddr) {
+			prev_taddr = cur_taddr;
+			cur_taddr = next_taddr;
+			table_ptr = next_table;
+		}
+	}
+
+	/* Sync the table for device */
+	tmc_sg_table_sync_table(catu_table);
+}
+
+static struct tmc_sg_table *
+catu_init_sg_table(struct device *catu_dev, int node,
+		   ssize_t size, void **pages)
+{
+	int nr_tpages;
+	struct tmc_sg_table *catu_table;
+
+	/*
+	 * Each table can address upto 1MB and we can have
+	 * CATU_PAGES_PER_SYSPAGE tables in a system page.
+	 */
+	nr_tpages = DIV_ROUND_UP(size, SZ_1M) / CATU_PAGES_PER_SYSPAGE;
+	catu_table = tmc_alloc_sg_table(catu_dev, node, nr_tpages,
+					size >> PAGE_SHIFT, pages);
+	if (IS_ERR(catu_table))
+		return catu_table;
+
+	catu_populate_table(catu_table);
+	dev_dbg(catu_dev,
+		"Setup table %p, size %ldKB, %d table pages\n",
+		catu_table, (unsigned long)size >> 10,  nr_tpages);
+	catu_dump_table(catu_table);
+	return catu_table;
+}
+
+static void catu_free_etr_buf(struct etr_buf *etr_buf)
+{
+	struct catu_etr_buf *catu_buf;
+
+	if (!etr_buf || etr_buf->mode != ETR_MODE_CATU || !etr_buf->private)
+		return;
+
+	catu_buf = etr_buf->private;
+	tmc_free_sg_table(catu_buf->catu_table);
+	kfree(catu_buf);
+}
+
+static ssize_t catu_get_data_etr_buf(struct etr_buf *etr_buf, u64 offset,
+				     size_t len, char **bufpp)
+{
+	struct catu_etr_buf *catu_buf = etr_buf->private;
+
+	return tmc_sg_table_get_data(catu_buf->catu_table, offset, len, bufpp);
+}
+
+static void catu_sync_etr_buf(struct etr_buf *etr_buf, u64 rrp, u64 rwp)
+{
+	struct catu_etr_buf *catu_buf = etr_buf->private;
+	struct tmc_sg_table *catu_table = catu_buf->catu_table;
+	u64 r_offset, w_offset;
+
+	/*
+	 * ETR started off at etr_buf->hwaddr. Convert the RRP/RWP to
+	 * offsets within the trace buffer.
+	 */
+	r_offset = rrp - etr_buf->hwaddr;
+	w_offset = rwp - etr_buf->hwaddr;
+
+	if (!etr_buf->full) {
+		etr_buf->len = w_offset - r_offset;
+		if (w_offset < r_offset)
+			etr_buf->len += etr_buf->size;
+	} else {
+		etr_buf->len = etr_buf->size;
+	}
+
+	etr_buf->offset = r_offset;
+	tmc_sg_table_sync_data_range(catu_table, r_offset, etr_buf->len);
+}
+
+static int catu_alloc_etr_buf(struct tmc_drvdata *tmc_drvdata,
+			      struct etr_buf *etr_buf, int node, void **pages)
+{
+	struct coresight_device *csdev;
+	struct tmc_sg_table *catu_table;
+	struct catu_etr_buf *catu_buf;
+
+	csdev = tmc_etr_get_catu_device(tmc_drvdata);
+	if (!csdev)
+		return -ENODEV;
+	catu_buf = kzalloc(sizeof(*catu_buf), GFP_KERNEL);
+	if (!catu_buf)
+		return -ENOMEM;
+
+	catu_table = catu_init_sg_table(&csdev->dev, node,
+					etr_buf->size, pages);
+	if (IS_ERR(catu_table)) {
+		kfree(catu_buf);
+		return PTR_ERR(catu_table);
+	}
+
+	etr_buf->mode = ETR_MODE_CATU;
+	etr_buf->private = catu_buf;
+	etr_buf->hwaddr = CATU_DEFAULT_INADDR;
+
+	catu_buf->catu_table = catu_table;
+	/* Get the table base address */
+	catu_buf->sladdr = catu_table->table_daddr;
+
+	return 0;
+}
+
+static const struct etr_buf_operations etr_catu_buf_ops = {
+	.alloc = catu_alloc_etr_buf,
+	.free = catu_free_etr_buf,
+	.sync = catu_sync_etr_buf,
+	.get_data = catu_get_data_etr_buf,
+};
+
+static struct attribute *catu_mgmt_attrs[] = {
+	coresight_simple_reg32(devid, CORESIGHT_DEVID),
+	coresight_simple_reg32(control, CATU_CONTROL),
+	coresight_simple_reg32(status, CATU_STATUS),
+	coresight_simple_reg32(mode, CATU_MODE),
+	coresight_simple_reg32(axictrl, CATU_AXICTRL),
+	coresight_simple_reg32(irqen, CATU_IRQEN),
+	coresight_simple_reg64(sladdr, CATU_SLADDRLO, CATU_SLADDRHI),
+	coresight_simple_reg64(inaddr, CATU_INADDRLO, CATU_INADDRHI),
+	NULL,
+};
+
+static const struct attribute_group catu_mgmt_group = {
+	.attrs = catu_mgmt_attrs,
+	.name = "mgmt",
+};
+
+static const struct attribute_group *catu_groups[] = {
+	&catu_mgmt_group,
+	NULL,
+};
+
+
+static inline int catu_wait_for_ready(struct catu_drvdata *drvdata)
+{
+	struct csdev_access *csa = &drvdata->csdev->access;
+
+	return coresight_timeout(csa, CATU_STATUS, CATU_STATUS_READY, 1);
+}
+
+static int catu_enable_hw(struct catu_drvdata *drvdata, enum cs_mode cs_mode,
+			  void *data)
+{
+	int rc;
+	u32 control, mode;
+	struct etr_buf *etr_buf = NULL;
+	struct device *dev = &drvdata->csdev->dev;
+	struct coresight_device *csdev = drvdata->csdev;
+	struct coresight_device *etrdev;
+	union coresight_dev_subtype etr_subtype = {
+		.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_SYSMEM
+	};
+
+	if (catu_wait_for_ready(drvdata))
+		dev_warn(dev, "Timeout while waiting for READY\n");
+
+	control = catu_read_control(drvdata);
+	if (control & BIT(CATU_CONTROL_ENABLE)) {
+		dev_warn(dev, "CATU is already enabled\n");
+		return -EBUSY;
+	}
+
+	rc = coresight_claim_device_unlocked(csdev);
+	if (rc)
+		return rc;
+
+	etrdev = coresight_find_input_type(
+		csdev->pdata, CORESIGHT_DEV_TYPE_SINK, etr_subtype);
+	if (etrdev) {
+		etr_buf = tmc_etr_get_buffer(etrdev, cs_mode, data);
+		if (IS_ERR(etr_buf))
+			return PTR_ERR(etr_buf);
+	}
+	control |= BIT(CATU_CONTROL_ENABLE);
+
+	if (etr_buf && etr_buf->mode == ETR_MODE_CATU) {
+		struct catu_etr_buf *catu_buf = etr_buf->private;
+
+		mode = CATU_MODE_TRANSLATE;
+		catu_write_axictrl(drvdata, CATU_OS_AXICTRL);
+		catu_write_sladdr(drvdata, catu_buf->sladdr);
+		catu_write_inaddr(drvdata, CATU_DEFAULT_INADDR);
+	} else {
+		mode = CATU_MODE_PASS_THROUGH;
+		catu_write_sladdr(drvdata, 0);
+		catu_write_inaddr(drvdata, 0);
+	}
+
+	catu_write_irqen(drvdata, 0);
+	catu_write_mode(drvdata, mode);
+	catu_write_control(drvdata, control);
+	dev_dbg(dev, "Enabled in %s mode\n",
+		(mode == CATU_MODE_PASS_THROUGH) ?
+		"Pass through" :
+		"Translate");
+	return 0;
+}
+
+static int catu_enable(struct coresight_device *csdev, enum cs_mode mode,
+		       void *data)
+{
+	int rc;
+	struct catu_drvdata *catu_drvdata = csdev_to_catu_drvdata(csdev);
+
+	CS_UNLOCK(catu_drvdata->base);
+	rc = catu_enable_hw(catu_drvdata, mode, data);
+	CS_LOCK(catu_drvdata->base);
+	return rc;
+}
+
+static int catu_disable_hw(struct catu_drvdata *drvdata)
+{
+	int rc = 0;
+	struct device *dev = &drvdata->csdev->dev;
+	struct coresight_device *csdev = drvdata->csdev;
+
+	catu_write_control(drvdata, 0);
+	coresight_disclaim_device_unlocked(csdev);
+	if (catu_wait_for_ready(drvdata)) {
+		dev_info(dev, "Timeout while waiting for READY\n");
+		rc = -EAGAIN;
+	}
+
+	dev_dbg(dev, "Disabled\n");
+	return rc;
+}
+
+static int catu_disable(struct coresight_device *csdev, void *__unused)
+{
+	int rc;
+	struct catu_drvdata *catu_drvdata = csdev_to_catu_drvdata(csdev);
+
+	CS_UNLOCK(catu_drvdata->base);
+	rc = catu_disable_hw(catu_drvdata);
+	CS_LOCK(catu_drvdata->base);
+	return rc;
+}
+
+static const struct coresight_ops_helper catu_helper_ops = {
+	.enable = catu_enable,
+	.disable = catu_disable,
+};
+
+static const struct coresight_ops catu_ops = {
+	.helper_ops = &catu_helper_ops,
+};
+
+static int catu_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	int ret = 0;
+	u32 dma_mask;
+	struct catu_drvdata *drvdata;
+	struct coresight_desc catu_desc;
+	struct coresight_platform_data *pdata = NULL;
+	struct device *dev = &adev->dev;
+	void __iomem *base;
+
+	catu_desc.name = coresight_alloc_device_name(&catu_devs, dev);
+	if (!catu_desc.name)
+		return -ENOMEM;
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	dev_set_drvdata(dev, drvdata);
+	base = devm_ioremap_resource(dev, &adev->res);
+	if (IS_ERR(base)) {
+		ret = PTR_ERR(base);
+		goto out;
+	}
+
+	/* Setup dma mask for the device */
+	dma_mask = readl_relaxed(base + CORESIGHT_DEVID) & 0x3f;
+	switch (dma_mask) {
+	case 32:
+	case 40:
+	case 44:
+	case 48:
+	case 52:
+	case 56:
+	case 64:
+		break;
+	default:
+		/* Default to the 40bits as supported by TMC-ETR */
+		dma_mask = 40;
+	}
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(dma_mask));
+	if (ret)
+		goto out;
+
+	pdata = coresight_get_platform_data(dev);
+	if (IS_ERR(pdata)) {
+		ret = PTR_ERR(pdata);
+		goto out;
+	}
+	dev->platform_data = pdata;
+
+	drvdata->base = base;
+	catu_desc.access = CSDEV_ACCESS_IOMEM(base);
+	catu_desc.pdata = pdata;
+	catu_desc.dev = dev;
+	catu_desc.groups = catu_groups;
+	catu_desc.type = CORESIGHT_DEV_TYPE_HELPER;
+	catu_desc.subtype.helper_subtype = CORESIGHT_DEV_SUBTYPE_HELPER_CATU;
+	catu_desc.ops = &catu_ops;
+
+	drvdata->csdev = coresight_register(&catu_desc);
+	if (IS_ERR(drvdata->csdev))
+		ret = PTR_ERR(drvdata->csdev);
+	else
+		pm_runtime_put(&adev->dev);
+out:
+	return ret;
+}
+
+static void catu_remove(struct amba_device *adev)
+{
+	struct catu_drvdata *drvdata = dev_get_drvdata(&adev->dev);
+
+	coresight_unregister(drvdata->csdev);
+}
+
+static struct amba_id catu_ids[] = {
+	CS_AMBA_ID(0x000bb9ee),
+	{},
+};
+
+MODULE_DEVICE_TABLE(amba, catu_ids);
+
+static struct amba_driver catu_driver = {
+	.drv = {
+		.name			= "coresight-catu",
+		.owner			= THIS_MODULE,
+		.suppress_bind_attrs	= true,
+	},
+	.probe				= catu_probe,
+	.remove				= catu_remove,
+	.id_table			= catu_ids,
+};
+
+static int __init catu_init(void)
+{
+	int ret;
+
+	ret = amba_driver_register(&catu_driver);
+	if (ret)
+		pr_info("Error registering catu driver\n");
+	tmc_etr_set_catu_ops(&etr_catu_buf_ops);
+	return ret;
+}
+
+static void __exit catu_exit(void)
+{
+	tmc_etr_remove_catu_ops();
+	amba_driver_unregister(&catu_driver);
+}
+
+module_init(catu_init);
+module_exit(catu_exit);
+
+MODULE_AUTHOR("Suzuki K Poulose <suzuki.poulose@arm.com>");
+MODULE_DESCRIPTION("Arm CoreSight Address Translation Unit (CATU) Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/hwtracing/coresight/coresight-catu.h b/drivers/hwtracing/coresight/coresight-catu.h
new file mode 100644
index 0000000000..442e034bbf
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-catu.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 Arm Limited. All rights reserved.
+ *
+ * Author: Suzuki K Poulose <suzuki.poulose@arm.com>
+ */
+
+#ifndef _CORESIGHT_CATU_H
+#define _CORESIGHT_CATU_H
+
+#include "coresight-priv.h"
+
+/* Register offset from base */
+#define CATU_CONTROL		0x000
+#define CATU_MODE		0x004
+#define CATU_AXICTRL		0x008
+#define CATU_IRQEN		0x00c
+#define CATU_SLADDRLO		0x020
+#define CATU_SLADDRHI		0x024
+#define CATU_INADDRLO		0x028
+#define CATU_INADDRHI		0x02c
+#define CATU_STATUS		0x100
+#define CATU_DEVARCH		0xfbc
+
+#define CATU_CONTROL_ENABLE	0
+
+#define CATU_MODE_PASS_THROUGH	0U
+#define CATU_MODE_TRANSLATE	1U
+
+#define CATU_AXICTRL_ARCACHE_SHIFT	4
+#define CATU_AXICTRL_ARCACHE_MASK	0xf
+#define CATU_AXICTRL_ARPROT_MASK	0x3
+#define CATU_AXICTRL_ARCACHE(arcache)		\
+	(((arcache) & CATU_AXICTRL_ARCACHE_MASK) << CATU_AXICTRL_ARCACHE_SHIFT)
+
+#define CATU_AXICTRL_VAL(arcache, arprot)	\
+	(CATU_AXICTRL_ARCACHE(arcache) | ((arprot) & CATU_AXICTRL_ARPROT_MASK))
+
+#define AXI3_AxCACHE_WB_READ_ALLOC	0x7
+/*
+ * AXI - ARPROT bits:
+ * See AMBA AXI & ACE Protocol specification (ARM IHI 0022E)
+ * sectionA4.7 Access Permissions.
+ *
+ * Bit 0: 0 - Unprivileged access, 1 - Privileged access
+ * Bit 1: 0 - Secure access, 1 - Non-secure access.
+ * Bit 2: 0 - Data access, 1 - instruction access.
+ *
+ * CATU AXICTRL:ARPROT[2] is res0 as we always access data.
+ */
+#define CATU_OS_ARPROT			0x2
+
+#define CATU_OS_AXICTRL		\
+	CATU_AXICTRL_VAL(AXI3_AxCACHE_WB_READ_ALLOC, CATU_OS_ARPROT)
+
+#define CATU_STATUS_READY	8
+#define CATU_STATUS_ADRERR	0
+#define CATU_STATUS_AXIERR	4
+
+#define CATU_IRQEN_ON		0x1
+#define CATU_IRQEN_OFF		0x0
+
+struct catu_drvdata {
+	void __iomem *base;
+	struct coresight_device *csdev;
+	int irq;
+};
+
+#define CATU_REG32(name, offset)					\
+static inline u32							\
+catu_read_##name(struct catu_drvdata *drvdata)				\
+{									\
+	return csdev_access_relaxed_read32(&drvdata->csdev->access, offset); \
+}									\
+static inline void							\
+catu_write_##name(struct catu_drvdata *drvdata, u32 val)		\
+{									\
+	csdev_access_relaxed_write32(&drvdata->csdev->access, val, offset); \
+}
+
+#define CATU_REG_PAIR(name, lo_off, hi_off)				\
+static inline u64							\
+catu_read_##name(struct catu_drvdata *drvdata)				\
+{									\
+	return csdev_access_relaxed_read_pair(&drvdata->csdev->access, lo_off, hi_off); \
+}									\
+static inline void							\
+catu_write_##name(struct catu_drvdata *drvdata, u64 val)		\
+{									\
+	csdev_access_relaxed_write_pair(&drvdata->csdev->access, val, lo_off, hi_off); \
+}
+
+CATU_REG32(control, CATU_CONTROL);
+CATU_REG32(mode, CATU_MODE);
+CATU_REG32(irqen, CATU_IRQEN);
+CATU_REG32(axictrl, CATU_AXICTRL);
+CATU_REG_PAIR(sladdr, CATU_SLADDRLO, CATU_SLADDRHI)
+CATU_REG_PAIR(inaddr, CATU_INADDRLO, CATU_INADDRHI)
+
+static inline bool coresight_is_catu_device(struct coresight_device *csdev)
+{
+	if (!IS_ENABLED(CONFIG_CORESIGHT_CATU))
+		return false;
+	if (csdev->type != CORESIGHT_DEV_TYPE_HELPER)
+		return false;
+	if (csdev->subtype.helper_subtype != CORESIGHT_DEV_SUBTYPE_HELPER_CATU)
+		return false;
+	return true;
+}
+
+#endif
diff --git a/drivers/hwtracing/coresight/coresight-cfg-afdo.c b/drivers/hwtracing/coresight/coresight-cfg-afdo.c
new file mode 100644
index 0000000000..84b3118425
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-cfg-afdo.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2020 Linaro Limited. All rights reserved.
+ * Author: Mike Leach <mike.leach@linaro.org>
+ */
+
+#include "coresight-config.h"
+
+/* ETMv4 includes and features */
+#if IS_ENABLED(CONFIG_CORESIGHT_SOURCE_ETM4X)
+#include "coresight-etm4x-cfg.h"
+
+/* preload configurations and features */
+
+/* preload in features for ETMv4 */
+
+/* strobe feature */
+static struct cscfg_parameter_desc strobe_params[] = {
+	{
+		.name = "window",
+		.value = 5000,
+	},
+	{
+		.name = "period",
+		.value = 10000,
+	},
+};
+
+static struct cscfg_regval_desc strobe_regs[] = {
+	/* resource selectors */
+	{
+		.type = CS_CFG_REG_TYPE_RESOURCE,
+		.offset = TRCRSCTLRn(2),
+		.hw_info = ETM4_CFG_RES_SEL,
+		.val32 = 0x20001,
+	},
+	{
+		.type = CS_CFG_REG_TYPE_RESOURCE,
+		.offset = TRCRSCTLRn(3),
+		.hw_info = ETM4_CFG_RES_SEQ,
+		.val32 = 0x20002,
+	},
+	/* strobe window counter 0 - reload from param 0 */
+	{
+		.type = CS_CFG_REG_TYPE_RESOURCE | CS_CFG_REG_TYPE_VAL_SAVE,
+		.offset = TRCCNTVRn(0),
+		.hw_info = ETM4_CFG_RES_CTR,
+	},
+	{
+		.type = CS_CFG_REG_TYPE_RESOURCE | CS_CFG_REG_TYPE_VAL_PARAM,
+		.offset = TRCCNTRLDVRn(0),
+		.hw_info = ETM4_CFG_RES_CTR,
+		.val32 = 0,
+	},
+	{
+		.type = CS_CFG_REG_TYPE_RESOURCE,
+		.offset = TRCCNTCTLRn(0),
+		.hw_info = ETM4_CFG_RES_CTR,
+		.val32 = 0x10001,
+	},
+	/* strobe period counter 1 - reload from param 1 */
+	{
+		.type = CS_CFG_REG_TYPE_RESOURCE | CS_CFG_REG_TYPE_VAL_SAVE,
+		.offset = TRCCNTVRn(1),
+		.hw_info = ETM4_CFG_RES_CTR,
+	},
+	{
+		.type = CS_CFG_REG_TYPE_RESOURCE | CS_CFG_REG_TYPE_VAL_PARAM,
+		.offset = TRCCNTRLDVRn(1),
+		.hw_info = ETM4_CFG_RES_CTR,
+		.val32 = 1,
+	},
+	{
+		.type = CS_CFG_REG_TYPE_RESOURCE,
+		.offset = TRCCNTCTLRn(1),
+		.hw_info = ETM4_CFG_RES_CTR,
+		.val32 = 0x8102,
+	},
+	/* sequencer */
+	{
+		.type = CS_CFG_REG_TYPE_RESOURCE,
+		.offset = TRCSEQEVRn(0),
+		.hw_info = ETM4_CFG_RES_SEQ,
+		.val32 = 0x0081,
+	},
+	{
+		.type = CS_CFG_REG_TYPE_RESOURCE,
+		.offset = TRCSEQEVRn(1),
+		.hw_info = ETM4_CFG_RES_SEQ,
+		.val32 = 0x0000,
+	},
+	/* view-inst */
+	{
+		.type = CS_CFG_REG_TYPE_STD | CS_CFG_REG_TYPE_VAL_MASK,
+		.offset = TRCVICTLR,
+		.val32 = 0x0003,
+		.mask32 = 0x0003,
+	},
+	/* end of regs */
+};
+
+struct cscfg_feature_desc strobe_etm4x = {
+	.name = "strobing",
+	.description = "Generate periodic trace capture windows.\n"
+		       "parameter \'window\': a number of CPU cycles (W)\n"
+		       "parameter \'period\': trace enabled for W cycles every period x W cycles\n",
+	.match_flags = CS_CFG_MATCH_CLASS_SRC_ETM4,
+	.nr_params = ARRAY_SIZE(strobe_params),
+	.params_desc = strobe_params,
+	.nr_regs = ARRAY_SIZE(strobe_regs),
+	.regs_desc = strobe_regs,
+};
+
+/* create an autofdo configuration */
+
+/* we will provide 9 sets of preset parameter values */
+#define AFDO_NR_PRESETS	9
+/* the total number of parameters in used features */
+#define AFDO_NR_PARAMS	ARRAY_SIZE(strobe_params)
+
+static const char *afdo_ref_names[] = {
+	"strobing",
+};
+
+/*
+ * set of presets leaves strobing window constant while varying period to allow
+ * experimentation with mark / space ratios for various workloads
+ */
+static u64 afdo_presets[AFDO_NR_PRESETS][AFDO_NR_PARAMS] = {
+	{ 5000, 2 },
+	{ 5000, 4 },
+	{ 5000, 8 },
+	{ 5000, 16 },
+	{ 5000, 64 },
+	{ 5000, 128 },
+	{ 5000, 512 },
+	{ 5000, 1024 },
+	{ 5000, 4096 },
+};
+
+struct cscfg_config_desc afdo_etm4x = {
+	.name = "autofdo",
+	.description = "Setup ETMs with strobing for autofdo\n"
+	"Supplied presets allow experimentation with mark-space ratio for various loads\n",
+	.nr_feat_refs = ARRAY_SIZE(afdo_ref_names),
+	.feat_ref_names = afdo_ref_names,
+	.nr_presets = AFDO_NR_PRESETS,
+	.nr_total_params = AFDO_NR_PARAMS,
+	.presets = &afdo_presets[0][0],
+};
+
+/* end of ETM4x configurations */
+#endif	/* IS_ENABLED(CONFIG_CORESIGHT_SOURCE_ETM4X) */
diff --git a/drivers/hwtracing/coresight/coresight-cfg-preload.c b/drivers/hwtracing/coresight/coresight-cfg-preload.c
new file mode 100644
index 0000000000..e237a4edfa
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-cfg-preload.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2020 Linaro Limited. All rights reserved.
+ * Author: Mike Leach <mike.leach@linaro.org>
+ */
+
+#include "coresight-cfg-preload.h"
+#include "coresight-config.h"
+#include "coresight-syscfg.h"
+
+/* Basic features and configurations pre-loaded on initialisation */
+
+static struct cscfg_feature_desc *preload_feats[] = {
+#if IS_ENABLED(CONFIG_CORESIGHT_SOURCE_ETM4X)
+	&strobe_etm4x,
+#endif
+	NULL
+};
+
+static struct cscfg_config_desc *preload_cfgs[] = {
+#if IS_ENABLED(CONFIG_CORESIGHT_SOURCE_ETM4X)
+	&afdo_etm4x,
+#endif
+	NULL
+};
+
+static struct cscfg_load_owner_info preload_owner = {
+	.type = CSCFG_OWNER_PRELOAD,
+};
+
+/* preload called on initialisation */
+int cscfg_preload(void *owner_handle)
+{
+	preload_owner.owner_handle = owner_handle;
+	return cscfg_load_config_sets(preload_cfgs, preload_feats, &preload_owner);
+}
diff --git a/drivers/hwtracing/coresight/coresight-cfg-preload.h b/drivers/hwtracing/coresight/coresight-cfg-preload.h
new file mode 100644
index 0000000000..21299e1754
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-cfg-preload.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(C) 2020 Linaro Limited. All rights reserved.
+ * Author: Mike Leach <mike.leach@linaro.org>
+ */
+
+/* declare preloaded configurations and features */
+
+/* from coresight-cfg-afdo.c - etm 4x features */
+#if IS_ENABLED(CONFIG_CORESIGHT_SOURCE_ETM4X)
+extern struct cscfg_feature_desc strobe_etm4x;
+extern struct cscfg_config_desc afdo_etm4x;
+#endif
diff --git a/drivers/hwtracing/coresight/coresight-config.c b/drivers/hwtracing/coresight/coresight-config.c
new file mode 100644
index 0000000000..4723bf7402
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-config.c
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2020 Linaro Limited. All rights reserved.
+ * Author: Mike Leach <mike.leach@linaro.org>
+ */
+
+#include <linux/sysfs.h>
+#include "coresight-config.h"
+#include "coresight-priv.h"
+
+/*
+ * This provides a set of generic functions that operate on configurations
+ * and features to manage the handling of parameters, the programming and
+ * saving of registers used by features on devices.
+ */
+
+/*
+ * Write the value held in the register structure into the driver internal memory
+ * location.
+ */
+static void cscfg_set_reg(struct cscfg_regval_csdev *reg_csdev)
+{
+	u32 *p_val32 = (u32 *)reg_csdev->driver_regval;
+	u32 tmp32 = reg_csdev->reg_desc.val32;
+
+	if (reg_csdev->reg_desc.type & CS_CFG_REG_TYPE_VAL_64BIT) {
+		*((u64 *)reg_csdev->driver_regval) = reg_csdev->reg_desc.val64;
+		return;
+	}
+
+	if (reg_csdev->reg_desc.type & CS_CFG_REG_TYPE_VAL_MASK) {
+		tmp32 = *p_val32;
+		tmp32 &= ~reg_csdev->reg_desc.mask32;
+		tmp32 |= reg_csdev->reg_desc.val32 & reg_csdev->reg_desc.mask32;
+	}
+	*p_val32 = tmp32;
+}
+
+/*
+ * Read the driver value into the reg if this is marked as one we want to save.
+ */
+static void cscfg_save_reg(struct cscfg_regval_csdev *reg_csdev)
+{
+	if (!(reg_csdev->reg_desc.type & CS_CFG_REG_TYPE_VAL_SAVE))
+		return;
+	if (reg_csdev->reg_desc.type & CS_CFG_REG_TYPE_VAL_64BIT)
+		reg_csdev->reg_desc.val64 = *(u64 *)(reg_csdev->driver_regval);
+	else
+		reg_csdev->reg_desc.val32 = *(u32 *)(reg_csdev->driver_regval);
+}
+
+/*
+ * Some register values are set from parameters. Initialise these registers
+ * from the current parameter values.
+ */
+static void cscfg_init_reg_param(struct cscfg_feature_csdev *feat_csdev,
+				 struct cscfg_regval_desc *reg_desc,
+				 struct cscfg_regval_csdev *reg_csdev)
+{
+	struct cscfg_parameter_csdev *param_csdev;
+
+	/* for param, load routines have validated the index */
+	param_csdev = &feat_csdev->params_csdev[reg_desc->param_idx];
+	param_csdev->reg_csdev = reg_csdev;
+	param_csdev->val64 = reg_csdev->reg_desc.type & CS_CFG_REG_TYPE_VAL_64BIT;
+
+	if (param_csdev->val64)
+		reg_csdev->reg_desc.val64 = param_csdev->current_value;
+	else
+		reg_csdev->reg_desc.val32 = (u32)param_csdev->current_value;
+}
+
+/* set values into the driver locations referenced in cscfg_reg_csdev */
+static int cscfg_set_on_enable(struct cscfg_feature_csdev *feat_csdev)
+{
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(feat_csdev->drv_spinlock, flags);
+	for (i = 0; i < feat_csdev->nr_regs; i++)
+		cscfg_set_reg(&feat_csdev->regs_csdev[i]);
+	spin_unlock_irqrestore(feat_csdev->drv_spinlock, flags);
+	dev_dbg(&feat_csdev->csdev->dev, "Feature %s: %s",
+		feat_csdev->feat_desc->name, "set on enable");
+	return 0;
+}
+
+/* copy back values from the driver locations referenced in cscfg_reg_csdev */
+static void cscfg_save_on_disable(struct cscfg_feature_csdev *feat_csdev)
+{
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(feat_csdev->drv_spinlock, flags);
+	for (i = 0; i < feat_csdev->nr_regs; i++)
+		cscfg_save_reg(&feat_csdev->regs_csdev[i]);
+	spin_unlock_irqrestore(feat_csdev->drv_spinlock, flags);
+	dev_dbg(&feat_csdev->csdev->dev, "Feature %s: %s",
+		feat_csdev->feat_desc->name, "save on disable");
+}
+
+/* default reset - restore default values */
+void cscfg_reset_feat(struct cscfg_feature_csdev *feat_csdev)
+{
+	struct cscfg_regval_desc *reg_desc;
+	struct cscfg_regval_csdev *reg_csdev;
+	int i;
+
+	/*
+	 * set the default values for all parameters and regs from the
+	 * relevant static descriptors.
+	 */
+	for (i = 0; i < feat_csdev->nr_params; i++)
+		feat_csdev->params_csdev[i].current_value =
+			feat_csdev->feat_desc->params_desc[i].value;
+
+	for (i = 0; i < feat_csdev->nr_regs; i++) {
+		reg_desc = &feat_csdev->feat_desc->regs_desc[i];
+		reg_csdev = &feat_csdev->regs_csdev[i];
+		reg_csdev->reg_desc.type = reg_desc->type;
+
+		/* check if reg set from a parameter otherwise desc default */
+		if (reg_desc->type & CS_CFG_REG_TYPE_VAL_PARAM)
+			cscfg_init_reg_param(feat_csdev, reg_desc, reg_csdev);
+		else
+			/*
+			 * for normal values the union between val64 & val32 + mask32
+			 * allows us to init using the 64 bit value
+			 */
+			reg_csdev->reg_desc.val64 = reg_desc->val64;
+	}
+}
+
+/*
+ * For the selected presets, we set the register associated with the parameter, to
+ * the value of the preset index associated with the parameter.
+ */
+static int cscfg_update_presets(struct cscfg_config_csdev *config_csdev, int preset)
+{
+	int i, j, val_idx = 0, nr_cfg_params;
+	struct cscfg_parameter_csdev *param_csdev;
+	struct cscfg_feature_csdev *feat_csdev;
+	const struct cscfg_config_desc *config_desc = config_csdev->config_desc;
+	const char *name;
+	const u64 *preset_base;
+	u64 val;
+
+	/* preset in range 1 to nr_presets */
+	if (preset < 1 || preset > config_desc->nr_presets)
+		return -EINVAL;
+	/*
+	 * Go through the array of features, assigning preset values to
+	 * feature parameters in the order they appear.
+	 * There should be precisely the same number of preset values as the
+	 * sum of number of parameters over all the features - but we will
+	 * ensure there is no overrun.
+	 */
+	nr_cfg_params = config_desc->nr_total_params;
+	preset_base = &config_desc->presets[(preset - 1) * nr_cfg_params];
+	for (i = 0; i < config_csdev->nr_feat; i++) {
+		feat_csdev = config_csdev->feats_csdev[i];
+		if (!feat_csdev->nr_params)
+			continue;
+
+		for (j = 0; j < feat_csdev->nr_params; j++) {
+			param_csdev = &feat_csdev->params_csdev[j];
+			name = feat_csdev->feat_desc->params_desc[j].name;
+			val = preset_base[val_idx++];
+			if (param_csdev->val64) {
+				dev_dbg(&config_csdev->csdev->dev,
+					"set param %s (%lld)", name, val);
+				param_csdev->reg_csdev->reg_desc.val64 = val;
+			} else {
+				param_csdev->reg_csdev->reg_desc.val32 = (u32)val;
+				dev_dbg(&config_csdev->csdev->dev,
+					"set param %s (%d)", name, (u32)val);
+			}
+		}
+
+		/* exit early if all params filled */
+		if (val_idx >= nr_cfg_params)
+			break;
+	}
+	return 0;
+}
+
+/*
+ * if we are not using a preset, then need to update the feature params
+ * with current values. This sets the register associated with the parameter
+ * with the current value of that parameter.
+ */
+static int cscfg_update_curr_params(struct cscfg_config_csdev *config_csdev)
+{
+	int i, j;
+	struct cscfg_feature_csdev *feat_csdev;
+	struct cscfg_parameter_csdev *param_csdev;
+	const char *name;
+	u64 val;
+
+	for (i = 0; i < config_csdev->nr_feat; i++) {
+		feat_csdev = config_csdev->feats_csdev[i];
+		if (!feat_csdev->nr_params)
+			continue;
+		for (j = 0; j < feat_csdev->nr_params; j++) {
+			param_csdev = &feat_csdev->params_csdev[j];
+			name = feat_csdev->feat_desc->params_desc[j].name;
+			val = param_csdev->current_value;
+			if (param_csdev->val64) {
+				dev_dbg(&config_csdev->csdev->dev,
+					"set param %s (%lld)", name, val);
+				param_csdev->reg_csdev->reg_desc.val64 = val;
+			} else {
+				param_csdev->reg_csdev->reg_desc.val32 = (u32)val;
+				dev_dbg(&config_csdev->csdev->dev,
+					"set param %s (%d)", name, (u32)val);
+			}
+		}
+	}
+	return 0;
+}
+
+/*
+ * Configuration values will be programmed into the driver locations if enabling, or read
+ * from relevant locations on disable.
+ */
+static int cscfg_prog_config(struct cscfg_config_csdev *config_csdev, bool enable)
+{
+	int i, err = 0;
+	struct cscfg_feature_csdev *feat_csdev;
+	struct coresight_device *csdev;
+
+	for (i = 0; i < config_csdev->nr_feat; i++) {
+		feat_csdev = config_csdev->feats_csdev[i];
+		csdev = feat_csdev->csdev;
+		dev_dbg(&csdev->dev, "cfg %s;  %s feature:%s", config_csdev->config_desc->name,
+			enable ? "enable" : "disable", feat_csdev->feat_desc->name);
+
+		if (enable)
+			err = cscfg_set_on_enable(feat_csdev);
+		else
+			cscfg_save_on_disable(feat_csdev);
+
+		if (err)
+			break;
+	}
+	return err;
+}
+
+/*
+ * Enable configuration for the device. Will result in the internal driver data
+ * being updated ready for programming into the device.
+ *
+ * @config_csdev:	config_csdev to set.
+ * @preset:		preset values to use - 0 for default.
+ */
+int cscfg_csdev_enable_config(struct cscfg_config_csdev *config_csdev, int preset)
+{
+	int err = 0;
+
+	if (preset)
+		err = cscfg_update_presets(config_csdev, preset);
+	else
+		err = cscfg_update_curr_params(config_csdev);
+	if (!err)
+		err = cscfg_prog_config(config_csdev, true);
+	return err;
+}
+
+void cscfg_csdev_disable_config(struct cscfg_config_csdev *config_csdev)
+{
+	cscfg_prog_config(config_csdev, false);
+}
diff --git a/drivers/hwtracing/coresight/coresight-config.h b/drivers/hwtracing/coresight/coresight-config.h
new file mode 100644
index 0000000000..6ba0139757
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-config.h
@@ -0,0 +1,262 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2020 Linaro Limited, All rights reserved.
+ * Author: Mike Leach <mike.leach@linaro.org>
+ */
+
+#ifndef _CORESIGHT_CORESIGHT_CONFIG_H
+#define _CORESIGHT_CORESIGHT_CONFIG_H
+
+#include <linux/coresight.h>
+#include <linux/types.h>
+
+/* CoreSight Configuration Management - component and system wide configuration */
+
+/*
+ * Register type flags for register value descriptor:
+ * describe how the value is interpreted, and handled.
+ */
+#define CS_CFG_REG_TYPE_STD		0x80	/* reg is standard reg */
+#define CS_CFG_REG_TYPE_RESOURCE	0x40	/* reg is a resource */
+#define CS_CFG_REG_TYPE_VAL_PARAM	0x08	/* reg value uses param */
+#define CS_CFG_REG_TYPE_VAL_MASK	0x04	/* reg value bit masked */
+#define CS_CFG_REG_TYPE_VAL_64BIT	0x02	/* reg value 64 bit */
+#define CS_CFG_REG_TYPE_VAL_SAVE	0x01	/* reg value save on disable */
+
+/*
+ * flags defining what device class a feature will match to when processing a
+ * system configuration - used by config data and devices.
+ */
+#define CS_CFG_MATCH_CLASS_SRC_ALL	0x0001	/* match any source */
+#define CS_CFG_MATCH_CLASS_SRC_ETM4	0x0002	/* match any ETMv4 device */
+
+/* flags defining device instance matching - used in config match desc data. */
+#define CS_CFG_MATCH_INST_ANY		0x80000000 /* any instance of a class */
+
+/*
+ * Limit number of presets in a configuration
+ * This is related to the number of bits (4) we use to select the preset on
+ * the perf command line. Preset 0 is always none selected.
+ * See PMU_FORMAT_ATTR(preset, "config:0-3") in coresight-etm-perf.c
+ */
+#define CS_CFG_CONFIG_PRESET_MAX 15
+
+/**
+ * Parameter descriptor for a device feature.
+ *
+ * @name:  Name of parameter.
+ * @value: Initial or default value.
+ */
+struct cscfg_parameter_desc {
+	const char *name;
+	u64 value;
+};
+
+/**
+ * Representation of register value and a descriptor of register usage.
+ *
+ * Used as a descriptor in the feature descriptors.
+ * Used as a value in when in a feature loading into a csdev.
+ *
+ * Supports full 64 bit register value, or 32 bit value with optional mask
+ * value.
+ *
+ * @type:	define register usage and interpretation.
+ * @offset:	the address offset for register in the hardware device (per device specification).
+ * @hw_info:	optional hardware device type specific information. (ETM / CTI specific etc)
+ * @val64:	64 bit value.
+ * @val32:	32 bit value.
+ * @mask32:	32 bit mask when using 32 bit value to access device register - if mask type.
+ * @param_idx:	parameter index value into parameter array if param type.
+ */
+struct cscfg_regval_desc {
+	struct {
+		u32 type:8;
+		u32 offset:12;
+		u32 hw_info:12;
+	};
+	union {
+		u64 val64;
+		struct {
+			u32 val32;
+			u32 mask32;
+		};
+		u32 param_idx;
+	};
+};
+
+/**
+ * Device feature descriptor - combination of registers and parameters to
+ * program a device to implement a specific complex function.
+ *
+ * @name:	 feature name.
+ * @description: brief description of the feature.
+ * @item:	 List entry.
+ * @match_flags: matching information if loading into a device
+ * @nr_params:   number of parameters used.
+ * @params_desc: array of parameters used.
+ * @nr_regs:	 number of registers used.
+ * @regs_desc:	 array of registers used.
+ * @load_owner:	 handle to load owner for dynamic load and unload of features.
+ * @fs_group:	 reference to configfs group for dynamic unload.
+ */
+struct cscfg_feature_desc {
+	const char *name;
+	const char *description;
+	struct list_head item;
+	u32 match_flags;
+	int nr_params;
+	struct cscfg_parameter_desc *params_desc;
+	int nr_regs;
+	struct cscfg_regval_desc *regs_desc;
+	void *load_owner;
+	struct config_group *fs_group;
+};
+
+/**
+ * Configuration descriptor - describes selectable system configuration.
+ *
+ * A configuration describes device features in use, and may provide preset
+ * values for the parameters in those features.
+ *
+ * A single set of presets is the sum of the parameters declared by
+ * all the features in use - this value is @nr_total_params.
+ *
+ * @name:		name of the configuration - used for selection.
+ * @description:	description of the purpose of the configuration.
+ * @item:		list entry.
+ * @nr_feat_refs:	Number of features used in this configuration.
+ * @feat_ref_names:	references to features used in this configuration.
+ * @nr_presets:		Number of sets of presets supplied by this configuration.
+ * @nr_total_params:	Sum of all parameters declared by used features
+ * @presets:		Array of preset values.
+ * @event_ea:		Extended attribute for perf event value
+ * @active_cnt:		ref count for activate on this configuration.
+ * @load_owner:		handle to load owner for dynamic load and unload of configs.
+ * @fs_group:		reference to configfs group for dynamic unload.
+ * @available:		config can be activated - multi-stage load sets true on completion.
+ */
+struct cscfg_config_desc {
+	const char *name;
+	const char *description;
+	struct list_head item;
+	int nr_feat_refs;
+	const char **feat_ref_names;
+	int nr_presets;
+	int nr_total_params;
+	const u64 *presets; /* nr_presets * nr_total_params */
+	struct dev_ext_attribute *event_ea;
+	atomic_t active_cnt;
+	void *load_owner;
+	struct config_group *fs_group;
+	bool available;
+};
+
+/**
+ * config register instance - part of a loaded feature.
+ *                            maps register values to csdev driver structures
+ *
+ * @reg_desc:		value to use when setting feature on device / store for
+ *			readback of volatile values.
+ * @driver_regval:	pointer to internal driver element used to set the value
+ *			in hardware.
+ */
+struct cscfg_regval_csdev {
+	struct cscfg_regval_desc reg_desc;
+	void *driver_regval;
+};
+
+/**
+ * config parameter instance - part of a loaded feature.
+ *
+ * @feat_csdev:		parent feature
+ * @reg_csdev:		register value updated by this parameter.
+ * @current_value:	current value of parameter - may be set by user via
+ *			sysfs, or modified during device operation.
+ * @val64:		true if 64 bit value
+ */
+struct cscfg_parameter_csdev {
+	struct cscfg_feature_csdev *feat_csdev;
+	struct cscfg_regval_csdev *reg_csdev;
+	u64 current_value;
+	bool val64;
+};
+
+/**
+ * Feature instance loaded into a CoreSight device.
+ *
+ * When a feature is loaded into a specific device, then this structure holds
+ * the connections between the register / parameter values used and the
+ * internal data structures that are written when the feature is enabled.
+ *
+ * Since applying a feature modifies internal data structures in the device,
+ * then we have a reference to the device spinlock to protect access to these
+ * structures (@drv_spinlock).
+ *
+ * @feat_desc:		pointer to the static descriptor for this feature.
+ * @csdev:		parent CoreSight device instance.
+ * @node:		list entry into feature list for this device.
+ * @drv_spinlock:	device spinlock for access to driver register data.
+ * @nr_params:		number of parameters.
+ * @params_csdev:	current parameter values on this device
+ * @nr_regs:		number of registers to be programmed.
+ * @regs_csdev:		Programming details for the registers
+ */
+struct cscfg_feature_csdev {
+	const struct cscfg_feature_desc *feat_desc;
+	struct coresight_device *csdev;
+	struct list_head node;
+	spinlock_t *drv_spinlock;
+	int nr_params;
+	struct cscfg_parameter_csdev *params_csdev;
+	int nr_regs;
+	struct cscfg_regval_csdev *regs_csdev;
+};
+
+/**
+ * Configuration instance when loaded into a CoreSight device.
+ *
+ * The instance contains references to loaded features on this device that are
+ * used by the configuration.
+ *
+ * @config_desc:reference to the descriptor for this configuration
+ * @csdev:	parent coresight device for this configuration instance.
+ * @enabled:	true if configuration is enabled on this device.
+ * @node:	list entry within the coresight device
+ * @nr_feat:	Number of features on this device that are used in the
+ *		configuration.
+ * @feats_csdev:references to the device features to enable.
+ */
+struct cscfg_config_csdev {
+	const struct cscfg_config_desc *config_desc;
+	struct coresight_device *csdev;
+	bool enabled;
+	struct list_head node;
+	int nr_feat;
+	struct cscfg_feature_csdev *feats_csdev[];
+};
+
+/**
+ * Coresight device operations.
+ *
+ * Registered coresight devices provide these operations to manage feature
+ * instances compatible with the device hardware and drivers
+ *
+ * @load_feat:	Pass a feature descriptor into the device and create the
+ *		loaded feature instance (struct cscfg_feature_csdev).
+ */
+struct cscfg_csdev_feat_ops {
+	int (*load_feat)(struct coresight_device *csdev,
+			 struct cscfg_feature_csdev *feat_csdev);
+};
+
+/* coresight config helper functions*/
+
+/* enable / disable config on a device - called with appropriate locks set.*/
+int cscfg_csdev_enable_config(struct cscfg_config_csdev *config_csdev, int preset);
+void cscfg_csdev_disable_config(struct cscfg_config_csdev *config_csdev);
+
+/* reset a feature to default values */
+void cscfg_reset_feat(struct cscfg_feature_csdev *feat_csdev);
+
+#endif /* _CORESIGHT_CORESIGHT_CONFIG_H */
diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
new file mode 100644
index 0000000000..9fabe00a40
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -0,0 +1,1839 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/build_bug.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/idr.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/stringhash.h>
+#include <linux/mutex.h>
+#include <linux/clk.h>
+#include <linux/coresight.h>
+#include <linux/property.h>
+#include <linux/delay.h>
+#include <linux/pm_runtime.h>
+
+#include "coresight-etm-perf.h"
+#include "coresight-priv.h"
+#include "coresight-syscfg.h"
+
+static DEFINE_MUTEX(coresight_mutex);
+static DEFINE_PER_CPU(struct coresight_device *, csdev_sink);
+
+/*
+ * Use IDR to map the hash of the source's device name
+ * to the pointer of path for the source. The idr is for
+ * the sources which aren't associated with CPU.
+ */
+static DEFINE_IDR(path_idr);
+
+/**
+ * struct coresight_node - elements of a path, from source to sink
+ * @csdev:	Address of an element.
+ * @link:	hook to the list.
+ */
+struct coresight_node {
+	struct coresight_device *csdev;
+	struct list_head link;
+};
+
+/*
+ * When operating Coresight drivers from the sysFS interface, only a single
+ * path can exist from a tracer (associated to a CPU) to a sink.
+ */
+static DEFINE_PER_CPU(struct list_head *, tracer_path);
+
+/*
+ * When losing synchronisation a new barrier packet needs to be inserted at the
+ * beginning of the data collected in a buffer.  That way the decoder knows that
+ * it needs to look for another sync sequence.
+ */
+const u32 coresight_barrier_pkt[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
+EXPORT_SYMBOL_GPL(coresight_barrier_pkt);
+
+static const struct cti_assoc_op *cti_assoc_ops;
+
+ssize_t coresight_simple_show_pair(struct device *_dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct coresight_device *csdev = container_of(_dev, struct coresight_device, dev);
+	struct cs_pair_attribute *cs_attr = container_of(attr, struct cs_pair_attribute, attr);
+	u64 val;
+
+	pm_runtime_get_sync(_dev->parent);
+	val = csdev_access_relaxed_read_pair(&csdev->access, cs_attr->lo_off, cs_attr->hi_off);
+	pm_runtime_put_sync(_dev->parent);
+	return sysfs_emit(buf, "0x%llx\n", val);
+}
+EXPORT_SYMBOL_GPL(coresight_simple_show_pair);
+
+ssize_t coresight_simple_show32(struct device *_dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct coresight_device *csdev = container_of(_dev, struct coresight_device, dev);
+	struct cs_off_attribute *cs_attr = container_of(attr, struct cs_off_attribute, attr);
+	u64 val;
+
+	pm_runtime_get_sync(_dev->parent);
+	val = csdev_access_relaxed_read32(&csdev->access, cs_attr->off);
+	pm_runtime_put_sync(_dev->parent);
+	return sysfs_emit(buf, "0x%llx\n", val);
+}
+EXPORT_SYMBOL_GPL(coresight_simple_show32);
+
+void coresight_set_cti_ops(const struct cti_assoc_op *cti_op)
+{
+	cti_assoc_ops = cti_op;
+}
+EXPORT_SYMBOL_GPL(coresight_set_cti_ops);
+
+void coresight_remove_cti_ops(void)
+{
+	cti_assoc_ops = NULL;
+}
+EXPORT_SYMBOL_GPL(coresight_remove_cti_ops);
+
+void coresight_set_percpu_sink(int cpu, struct coresight_device *csdev)
+{
+	per_cpu(csdev_sink, cpu) = csdev;
+}
+EXPORT_SYMBOL_GPL(coresight_set_percpu_sink);
+
+struct coresight_device *coresight_get_percpu_sink(int cpu)
+{
+	return per_cpu(csdev_sink, cpu);
+}
+EXPORT_SYMBOL_GPL(coresight_get_percpu_sink);
+
+static struct coresight_connection *
+coresight_find_out_connection(struct coresight_device *src_dev,
+			      struct coresight_device *dest_dev)
+{
+	int i;
+	struct coresight_connection *conn;
+
+	for (i = 0; i < src_dev->pdata->nr_outconns; i++) {
+		conn = src_dev->pdata->out_conns[i];
+		if (conn->dest_dev == dest_dev)
+			return conn;
+	}
+
+	dev_err(&src_dev->dev,
+		"couldn't find output connection, src_dev: %s, dest_dev: %s\n",
+		dev_name(&src_dev->dev), dev_name(&dest_dev->dev));
+
+	return ERR_PTR(-ENODEV);
+}
+
+static inline u32 coresight_read_claim_tags(struct coresight_device *csdev)
+{
+	return csdev_access_relaxed_read32(&csdev->access, CORESIGHT_CLAIMCLR);
+}
+
+static inline bool coresight_is_claimed_self_hosted(struct coresight_device *csdev)
+{
+	return coresight_read_claim_tags(csdev) == CORESIGHT_CLAIM_SELF_HOSTED;
+}
+
+static inline bool coresight_is_claimed_any(struct coresight_device *csdev)
+{
+	return coresight_read_claim_tags(csdev) != 0;
+}
+
+static inline void coresight_set_claim_tags(struct coresight_device *csdev)
+{
+	csdev_access_relaxed_write32(&csdev->access, CORESIGHT_CLAIM_SELF_HOSTED,
+				     CORESIGHT_CLAIMSET);
+	isb();
+}
+
+static inline void coresight_clear_claim_tags(struct coresight_device *csdev)
+{
+	csdev_access_relaxed_write32(&csdev->access, CORESIGHT_CLAIM_SELF_HOSTED,
+				     CORESIGHT_CLAIMCLR);
+	isb();
+}
+
+/*
+ * coresight_claim_device_unlocked : Claim the device for self-hosted usage
+ * to prevent an external tool from touching this device. As per PSCI
+ * standards, section "Preserving the execution context" => "Debug and Trace
+ * save and Restore", DBGCLAIM[1] is reserved for Self-hosted debug/trace and
+ * DBGCLAIM[0] is reserved for external tools.
+ *
+ * Called with CS_UNLOCKed for the component.
+ * Returns : 0 on success
+ */
+int coresight_claim_device_unlocked(struct coresight_device *csdev)
+{
+	if (WARN_ON(!csdev))
+		return -EINVAL;
+
+	if (coresight_is_claimed_any(csdev))
+		return -EBUSY;
+
+	coresight_set_claim_tags(csdev);
+	if (coresight_is_claimed_self_hosted(csdev))
+		return 0;
+	/* There was a race setting the tags, clean up and fail */
+	coresight_clear_claim_tags(csdev);
+	return -EBUSY;
+}
+EXPORT_SYMBOL_GPL(coresight_claim_device_unlocked);
+
+int coresight_claim_device(struct coresight_device *csdev)
+{
+	int rc;
+
+	if (WARN_ON(!csdev))
+		return -EINVAL;
+
+	CS_UNLOCK(csdev->access.base);
+	rc = coresight_claim_device_unlocked(csdev);
+	CS_LOCK(csdev->access.base);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(coresight_claim_device);
+
+/*
+ * coresight_disclaim_device_unlocked : Clear the claim tags for the device.
+ * Called with CS_UNLOCKed for the component.
+ */
+void coresight_disclaim_device_unlocked(struct coresight_device *csdev)
+{
+
+	if (WARN_ON(!csdev))
+		return;
+
+	if (coresight_is_claimed_self_hosted(csdev))
+		coresight_clear_claim_tags(csdev);
+	else
+		/*
+		 * The external agent may have not honoured our claim
+		 * and has manipulated it. Or something else has seriously
+		 * gone wrong in our driver.
+		 */
+		WARN_ON_ONCE(1);
+}
+EXPORT_SYMBOL_GPL(coresight_disclaim_device_unlocked);
+
+void coresight_disclaim_device(struct coresight_device *csdev)
+{
+	if (WARN_ON(!csdev))
+		return;
+
+	CS_UNLOCK(csdev->access.base);
+	coresight_disclaim_device_unlocked(csdev);
+	CS_LOCK(csdev->access.base);
+}
+EXPORT_SYMBOL_GPL(coresight_disclaim_device);
+
+/*
+ * Add a helper as an output device. This function takes the @coresight_mutex
+ * because it's assumed that it's called from the helper device, outside of the
+ * core code where the mutex would already be held. Don't add new calls to this
+ * from inside the core code, instead try to add the new helper to the DT and
+ * ACPI where it will be picked up and linked automatically.
+ */
+void coresight_add_helper(struct coresight_device *csdev,
+			  struct coresight_device *helper)
+{
+	int i;
+	struct coresight_connection conn = {};
+	struct coresight_connection *new_conn;
+
+	mutex_lock(&coresight_mutex);
+	conn.dest_fwnode = fwnode_handle_get(dev_fwnode(&helper->dev));
+	conn.dest_dev = helper;
+	conn.dest_port = conn.src_port = -1;
+	conn.src_dev = csdev;
+
+	/*
+	 * Check for duplicates because this is called every time a helper
+	 * device is re-loaded. Existing connections will get re-linked
+	 * automatically.
+	 */
+	for (i = 0; i < csdev->pdata->nr_outconns; ++i)
+		if (csdev->pdata->out_conns[i]->dest_fwnode == conn.dest_fwnode)
+			goto unlock;
+
+	new_conn = coresight_add_out_conn(csdev->dev.parent, csdev->pdata,
+					  &conn);
+	if (!IS_ERR(new_conn))
+		coresight_add_in_conn(new_conn);
+
+unlock:
+	mutex_unlock(&coresight_mutex);
+}
+EXPORT_SYMBOL_GPL(coresight_add_helper);
+
+static int coresight_enable_sink(struct coresight_device *csdev,
+				 enum cs_mode mode, void *data)
+{
+	int ret;
+
+	/*
+	 * We need to make sure the "new" session is compatible with the
+	 * existing "mode" of operation.
+	 */
+	if (!sink_ops(csdev)->enable)
+		return -EINVAL;
+
+	ret = sink_ops(csdev)->enable(csdev, mode, data);
+	if (ret)
+		return ret;
+
+	csdev->enable = true;
+
+	return 0;
+}
+
+static void coresight_disable_sink(struct coresight_device *csdev)
+{
+	int ret;
+
+	if (!sink_ops(csdev)->disable)
+		return;
+
+	ret = sink_ops(csdev)->disable(csdev);
+	if (ret)
+		return;
+	csdev->enable = false;
+}
+
+static int coresight_enable_link(struct coresight_device *csdev,
+				 struct coresight_device *parent,
+				 struct coresight_device *child)
+{
+	int ret = 0;
+	int link_subtype;
+	struct coresight_connection *inconn, *outconn;
+
+	if (!parent || !child)
+		return -EINVAL;
+
+	inconn = coresight_find_out_connection(parent, csdev);
+	outconn = coresight_find_out_connection(csdev, child);
+	link_subtype = csdev->subtype.link_subtype;
+
+	if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG && IS_ERR(inconn))
+		return PTR_ERR(inconn);
+	if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT && IS_ERR(outconn))
+		return PTR_ERR(outconn);
+
+	if (link_ops(csdev)->enable) {
+		ret = link_ops(csdev)->enable(csdev, inconn, outconn);
+		if (!ret)
+			csdev->enable = true;
+	}
+
+	return ret;
+}
+
+static void coresight_disable_link(struct coresight_device *csdev,
+				   struct coresight_device *parent,
+				   struct coresight_device *child)
+{
+	int i;
+	int link_subtype;
+	struct coresight_connection *inconn, *outconn;
+
+	if (!parent || !child)
+		return;
+
+	inconn = coresight_find_out_connection(parent, csdev);
+	outconn = coresight_find_out_connection(csdev, child);
+	link_subtype = csdev->subtype.link_subtype;
+
+	if (link_ops(csdev)->disable) {
+		link_ops(csdev)->disable(csdev, inconn, outconn);
+	}
+
+	if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG) {
+		for (i = 0; i < csdev->pdata->nr_inconns; i++)
+			if (atomic_read(&csdev->pdata->in_conns[i]->dest_refcnt) !=
+			    0)
+				return;
+	} else if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT) {
+		for (i = 0; i < csdev->pdata->nr_outconns; i++)
+			if (atomic_read(&csdev->pdata->out_conns[i]->src_refcnt) !=
+			    0)
+				return;
+	} else {
+		if (atomic_read(&csdev->refcnt) != 0)
+			return;
+	}
+
+	csdev->enable = false;
+}
+
+int coresight_enable_source(struct coresight_device *csdev, enum cs_mode mode,
+			    void *data)
+{
+	int ret;
+
+	if (!csdev->enable) {
+		if (source_ops(csdev)->enable) {
+			ret = source_ops(csdev)->enable(csdev, data, mode);
+			if (ret)
+				return ret;
+		}
+		csdev->enable = true;
+	}
+
+	atomic_inc(&csdev->refcnt);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(coresight_enable_source);
+
+static bool coresight_is_helper(struct coresight_device *csdev)
+{
+	return csdev->type == CORESIGHT_DEV_TYPE_HELPER;
+}
+
+static int coresight_enable_helper(struct coresight_device *csdev,
+				   enum cs_mode mode, void *data)
+{
+	int ret;
+
+	if (!helper_ops(csdev)->enable)
+		return 0;
+	ret = helper_ops(csdev)->enable(csdev, mode, data);
+	if (ret)
+		return ret;
+
+	csdev->enable = true;
+	return 0;
+}
+
+static void coresight_disable_helper(struct coresight_device *csdev)
+{
+	int ret;
+
+	if (!helper_ops(csdev)->disable)
+		return;
+
+	ret = helper_ops(csdev)->disable(csdev, NULL);
+	if (ret)
+		return;
+	csdev->enable = false;
+}
+
+static void coresight_disable_helpers(struct coresight_device *csdev)
+{
+	int i;
+	struct coresight_device *helper;
+
+	for (i = 0; i < csdev->pdata->nr_outconns; ++i) {
+		helper = csdev->pdata->out_conns[i]->dest_dev;
+		if (helper && coresight_is_helper(helper))
+			coresight_disable_helper(helper);
+	}
+}
+
+/**
+ *  coresight_disable_source - Drop the reference count by 1 and disable
+ *  the device if there are no users left.
+ *
+ *  @csdev: The coresight device to disable
+ *  @data: Opaque data to pass on to the disable function of the source device.
+ *         For example in perf mode this is a pointer to the struct perf_event.
+ *
+ *  Returns true if the device has been disabled.
+ */
+bool coresight_disable_source(struct coresight_device *csdev, void *data)
+{
+	if (atomic_dec_return(&csdev->refcnt) == 0) {
+		if (source_ops(csdev)->disable)
+			source_ops(csdev)->disable(csdev, data);
+		coresight_disable_helpers(csdev);
+		csdev->enable = false;
+	}
+	return !csdev->enable;
+}
+EXPORT_SYMBOL_GPL(coresight_disable_source);
+
+/*
+ * coresight_disable_path_from : Disable components in the given path beyond
+ * @nd in the list. If @nd is NULL, all the components, except the SOURCE are
+ * disabled.
+ */
+static void coresight_disable_path_from(struct list_head *path,
+					struct coresight_node *nd)
+{
+	u32 type;
+	struct coresight_device *csdev, *parent, *child;
+
+	if (!nd)
+		nd = list_first_entry(path, struct coresight_node, link);
+
+	list_for_each_entry_continue(nd, path, link) {
+		csdev = nd->csdev;
+		type = csdev->type;
+
+		/*
+		 * ETF devices are tricky... They can be a link or a sink,
+		 * depending on how they are configured.  If an ETF has been
+		 * "activated" it will be configured as a sink, otherwise
+		 * go ahead with the link configuration.
+		 */
+		if (type == CORESIGHT_DEV_TYPE_LINKSINK)
+			type = (csdev == coresight_get_sink(path)) ?
+						CORESIGHT_DEV_TYPE_SINK :
+						CORESIGHT_DEV_TYPE_LINK;
+
+		switch (type) {
+		case CORESIGHT_DEV_TYPE_SINK:
+			coresight_disable_sink(csdev);
+			break;
+		case CORESIGHT_DEV_TYPE_SOURCE:
+			/*
+			 * We skip the first node in the path assuming that it
+			 * is the source. So we don't expect a source device in
+			 * the middle of a path.
+			 */
+			WARN_ON(1);
+			break;
+		case CORESIGHT_DEV_TYPE_LINK:
+			parent = list_prev_entry(nd, link)->csdev;
+			child = list_next_entry(nd, link)->csdev;
+			coresight_disable_link(csdev, parent, child);
+			break;
+		default:
+			break;
+		}
+
+		/* Disable all helpers adjacent along the path last */
+		coresight_disable_helpers(csdev);
+	}
+}
+
+void coresight_disable_path(struct list_head *path)
+{
+	coresight_disable_path_from(path, NULL);
+}
+EXPORT_SYMBOL_GPL(coresight_disable_path);
+
+static int coresight_enable_helpers(struct coresight_device *csdev,
+				    enum cs_mode mode, void *data)
+{
+	int i, ret = 0;
+	struct coresight_device *helper;
+
+	for (i = 0; i < csdev->pdata->nr_outconns; ++i) {
+		helper = csdev->pdata->out_conns[i]->dest_dev;
+		if (!helper || !coresight_is_helper(helper))
+			continue;
+
+		ret = coresight_enable_helper(helper, mode, data);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int coresight_enable_path(struct list_head *path, enum cs_mode mode,
+			  void *sink_data)
+{
+	int ret = 0;
+	u32 type;
+	struct coresight_node *nd;
+	struct coresight_device *csdev, *parent, *child;
+
+	list_for_each_entry_reverse(nd, path, link) {
+		csdev = nd->csdev;
+		type = csdev->type;
+
+		/* Enable all helpers adjacent to the path first */
+		ret = coresight_enable_helpers(csdev, mode, sink_data);
+		if (ret)
+			goto err;
+		/*
+		 * ETF devices are tricky... They can be a link or a sink,
+		 * depending on how they are configured.  If an ETF has been
+		 * "activated" it will be configured as a sink, otherwise
+		 * go ahead with the link configuration.
+		 */
+		if (type == CORESIGHT_DEV_TYPE_LINKSINK)
+			type = (csdev == coresight_get_sink(path)) ?
+						CORESIGHT_DEV_TYPE_SINK :
+						CORESIGHT_DEV_TYPE_LINK;
+
+		switch (type) {
+		case CORESIGHT_DEV_TYPE_SINK:
+			ret = coresight_enable_sink(csdev, mode, sink_data);
+			/*
+			 * Sink is the first component turned on. If we
+			 * failed to enable the sink, there are no components
+			 * that need disabling. Disabling the path here
+			 * would mean we could disrupt an existing session.
+			 */
+			if (ret)
+				goto out;
+			break;
+		case CORESIGHT_DEV_TYPE_SOURCE:
+			/* sources are enabled from either sysFS or Perf */
+			break;
+		case CORESIGHT_DEV_TYPE_LINK:
+			parent = list_prev_entry(nd, link)->csdev;
+			child = list_next_entry(nd, link)->csdev;
+			ret = coresight_enable_link(csdev, parent, child);
+			if (ret)
+				goto err;
+			break;
+		default:
+			goto err;
+		}
+	}
+
+out:
+	return ret;
+err:
+	coresight_disable_path_from(path, nd);
+	goto out;
+}
+
+struct coresight_device *coresight_get_sink(struct list_head *path)
+{
+	struct coresight_device *csdev;
+
+	if (!path)
+		return NULL;
+
+	csdev = list_last_entry(path, struct coresight_node, link)->csdev;
+	if (csdev->type != CORESIGHT_DEV_TYPE_SINK &&
+	    csdev->type != CORESIGHT_DEV_TYPE_LINKSINK)
+		return NULL;
+
+	return csdev;
+}
+
+static struct coresight_device *
+coresight_find_enabled_sink(struct coresight_device *csdev)
+{
+	int i;
+	struct coresight_device *sink = NULL;
+
+	if ((csdev->type == CORESIGHT_DEV_TYPE_SINK ||
+	     csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) &&
+	     csdev->activated)
+		return csdev;
+
+	/*
+	 * Recursively explore each port found on this element.
+	 */
+	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
+		struct coresight_device *child_dev;
+
+		child_dev = csdev->pdata->out_conns[i]->dest_dev;
+		if (child_dev)
+			sink = coresight_find_enabled_sink(child_dev);
+		if (sink)
+			return sink;
+	}
+
+	return NULL;
+}
+
+/**
+ * coresight_get_enabled_sink - returns the first enabled sink using
+ * connection based search starting from the source reference
+ *
+ * @source: Coresight source device reference
+ */
+struct coresight_device *
+coresight_get_enabled_sink(struct coresight_device *source)
+{
+	if (!source)
+		return NULL;
+
+	return coresight_find_enabled_sink(source);
+}
+
+static int coresight_sink_by_id(struct device *dev, const void *data)
+{
+	struct coresight_device *csdev = to_coresight_device(dev);
+	unsigned long hash;
+
+	if (csdev->type == CORESIGHT_DEV_TYPE_SINK ||
+	     csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) {
+
+		if (!csdev->ea)
+			return 0;
+		/*
+		 * See function etm_perf_add_symlink_sink() to know where
+		 * this comes from.
+		 */
+		hash = (unsigned long)csdev->ea->var;
+
+		if ((u32)hash == *(u32 *)data)
+			return 1;
+	}
+
+	return 0;
+}
+
+/**
+ * coresight_get_sink_by_id - returns the sink that matches the id
+ * @id: Id of the sink to match
+ *
+ * The name of a sink is unique, whether it is found on the AMBA bus or
+ * otherwise.  As such the hash of that name can easily be used to identify
+ * a sink.
+ */
+struct coresight_device *coresight_get_sink_by_id(u32 id)
+{
+	struct device *dev = NULL;
+
+	dev = bus_find_device(&coresight_bustype, NULL, &id,
+			      coresight_sink_by_id);
+
+	return dev ? to_coresight_device(dev) : NULL;
+}
+
+/**
+ * coresight_get_ref- Helper function to increase reference count to module
+ * and device.
+ *
+ * @csdev: The coresight device to get a reference on.
+ *
+ * Return true in successful case and power up the device.
+ * Return false when failed to get reference of module.
+ */
+static inline bool coresight_get_ref(struct coresight_device *csdev)
+{
+	struct device *dev = csdev->dev.parent;
+
+	/* Make sure the driver can't be removed */
+	if (!try_module_get(dev->driver->owner))
+		return false;
+	/* Make sure the device can't go away */
+	get_device(dev);
+	pm_runtime_get_sync(dev);
+	return true;
+}
+
+/**
+ * coresight_put_ref- Helper function to decrease reference count to module
+ * and device. Power off the device.
+ *
+ * @csdev: The coresight device to decrement a reference from.
+ */
+static inline void coresight_put_ref(struct coresight_device *csdev)
+{
+	struct device *dev = csdev->dev.parent;
+
+	pm_runtime_put(dev);
+	put_device(dev);
+	module_put(dev->driver->owner);
+}
+
+/*
+ * coresight_grab_device - Power up this device and any of the helper
+ * devices connected to it for trace operation. Since the helper devices
+ * don't appear on the trace path, they should be handled along with the
+ * master device.
+ */
+static int coresight_grab_device(struct coresight_device *csdev)
+{
+	int i;
+
+	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
+		struct coresight_device *child;
+
+		child = csdev->pdata->out_conns[i]->dest_dev;
+		if (child && coresight_is_helper(child))
+			if (!coresight_get_ref(child))
+				goto err;
+	}
+	if (coresight_get_ref(csdev))
+		return 0;
+err:
+	for (i--; i >= 0; i--) {
+		struct coresight_device *child;
+
+		child = csdev->pdata->out_conns[i]->dest_dev;
+		if (child && coresight_is_helper(child))
+			coresight_put_ref(child);
+	}
+	return -ENODEV;
+}
+
+/*
+ * coresight_drop_device - Release this device and any of the helper
+ * devices connected to it.
+ */
+static void coresight_drop_device(struct coresight_device *csdev)
+{
+	int i;
+
+	coresight_put_ref(csdev);
+	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
+		struct coresight_device *child;
+
+		child = csdev->pdata->out_conns[i]->dest_dev;
+		if (child && coresight_is_helper(child))
+			coresight_put_ref(child);
+	}
+}
+
+/**
+ * _coresight_build_path - recursively build a path from a @csdev to a sink.
+ * @csdev:	The device to start from.
+ * @sink:	The final sink we want in this path.
+ * @path:	The list to add devices to.
+ *
+ * The tree of Coresight device is traversed until an activated sink is
+ * found.  From there the sink is added to the list along with all the
+ * devices that led to that point - the end result is a list from source
+ * to sink. In that list the source is the first device and the sink the
+ * last one.
+ */
+static int _coresight_build_path(struct coresight_device *csdev,
+				 struct coresight_device *sink,
+				 struct list_head *path)
+{
+	int i, ret;
+	bool found = false;
+	struct coresight_node *node;
+
+	/* An activated sink has been found.  Enqueue the element */
+	if (csdev == sink)
+		goto out;
+
+	if (coresight_is_percpu_source(csdev) && coresight_is_percpu_sink(sink) &&
+	    sink == per_cpu(csdev_sink, source_ops(csdev)->cpu_id(csdev))) {
+		if (_coresight_build_path(sink, sink, path) == 0) {
+			found = true;
+			goto out;
+		}
+	}
+
+	/* Not a sink - recursively explore each port found on this element */
+	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
+		struct coresight_device *child_dev;
+
+		child_dev = csdev->pdata->out_conns[i]->dest_dev;
+		if (child_dev &&
+		    _coresight_build_path(child_dev, sink, path) == 0) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found)
+		return -ENODEV;
+
+out:
+	/*
+	 * A path from this element to a sink has been found.  The elements
+	 * leading to the sink are already enqueued, all that is left to do
+	 * is tell the PM runtime core we need this element and add a node
+	 * for it.
+	 */
+	ret = coresight_grab_device(csdev);
+	if (ret)
+		return ret;
+
+	node = kzalloc(sizeof(struct coresight_node), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+
+	node->csdev = csdev;
+	list_add(&node->link, path);
+
+	return 0;
+}
+
+struct list_head *coresight_build_path(struct coresight_device *source,
+				       struct coresight_device *sink)
+{
+	struct list_head *path;
+	int rc;
+
+	if (!sink)
+		return ERR_PTR(-EINVAL);
+
+	path = kzalloc(sizeof(struct list_head), GFP_KERNEL);
+	if (!path)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(path);
+
+	rc = _coresight_build_path(source, sink, path);
+	if (rc) {
+		kfree(path);
+		return ERR_PTR(rc);
+	}
+
+	return path;
+}
+
+/**
+ * coresight_release_path - release a previously built path.
+ * @path:	the path to release.
+ *
+ * Go through all the elements of a path and 1) removed it from the list and
+ * 2) free the memory allocated for each node.
+ */
+void coresight_release_path(struct list_head *path)
+{
+	struct coresight_device *csdev;
+	struct coresight_node *nd, *next;
+
+	list_for_each_entry_safe(nd, next, path, link) {
+		csdev = nd->csdev;
+
+		coresight_drop_device(csdev);
+		list_del(&nd->link);
+		kfree(nd);
+	}
+
+	kfree(path);
+}
+
+/* return true if the device is a suitable type for a default sink */
+static inline bool coresight_is_def_sink_type(struct coresight_device *csdev)
+{
+	/* sink & correct subtype */
+	if (((csdev->type == CORESIGHT_DEV_TYPE_SINK) ||
+	     (csdev->type == CORESIGHT_DEV_TYPE_LINKSINK)) &&
+	    (csdev->subtype.sink_subtype >= CORESIGHT_DEV_SUBTYPE_SINK_BUFFER))
+		return true;
+	return false;
+}
+
+/**
+ * coresight_select_best_sink - return the best sink for use as default from
+ * the two provided.
+ *
+ * @sink:	current best sink.
+ * @depth:      search depth where current sink was found.
+ * @new_sink:	new sink for comparison with current sink.
+ * @new_depth:  search depth where new sink was found.
+ *
+ * Sinks prioritised according to coresight_dev_subtype_sink, with only
+ * subtypes CORESIGHT_DEV_SUBTYPE_SINK_BUFFER or higher being used.
+ *
+ * Where two sinks of equal priority are found, the sink closest to the
+ * source is used (smallest search depth).
+ *
+ * return @new_sink & update @depth if better than @sink, else return @sink.
+ */
+static struct coresight_device *
+coresight_select_best_sink(struct coresight_device *sink, int *depth,
+			   struct coresight_device *new_sink, int new_depth)
+{
+	bool update = false;
+
+	if (!sink) {
+		/* first found at this level */
+		update = true;
+	} else if (new_sink->subtype.sink_subtype >
+		   sink->subtype.sink_subtype) {
+		/* found better sink */
+		update = true;
+	} else if ((new_sink->subtype.sink_subtype ==
+		    sink->subtype.sink_subtype) &&
+		   (*depth > new_depth)) {
+		/* found same but closer sink */
+		update = true;
+	}
+
+	if (update)
+		*depth = new_depth;
+	return update ? new_sink : sink;
+}
+
+/**
+ * coresight_find_sink - recursive function to walk trace connections from
+ * source to find a suitable default sink.
+ *
+ * @csdev: source / current device to check.
+ * @depth: [in] search depth of calling dev, [out] depth of found sink.
+ *
+ * This will walk the connection path from a source (ETM) till a suitable
+ * sink is encountered and return that sink to the original caller.
+ *
+ * If current device is a plain sink return that & depth, otherwise recursively
+ * call child connections looking for a sink. Select best possible using
+ * coresight_select_best_sink.
+ *
+ * return best sink found, or NULL if not found at this node or child nodes.
+ */
+static struct coresight_device *
+coresight_find_sink(struct coresight_device *csdev, int *depth)
+{
+	int i, curr_depth = *depth + 1, found_depth = 0;
+	struct coresight_device *found_sink = NULL;
+
+	if (coresight_is_def_sink_type(csdev)) {
+		found_depth = curr_depth;
+		found_sink = csdev;
+		if (csdev->type == CORESIGHT_DEV_TYPE_SINK)
+			goto return_def_sink;
+		/* look past LINKSINK for something better */
+	}
+
+	/*
+	 * Not a sink we want - or possible child sink may be better.
+	 * recursively explore each port found on this element.
+	 */
+	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
+		struct coresight_device *child_dev, *sink = NULL;
+		int child_depth = curr_depth;
+
+		child_dev = csdev->pdata->out_conns[i]->dest_dev;
+		if (child_dev)
+			sink = coresight_find_sink(child_dev, &child_depth);
+
+		if (sink)
+			found_sink = coresight_select_best_sink(found_sink,
+								&found_depth,
+								sink,
+								child_depth);
+	}
+
+return_def_sink:
+	/* return found sink and depth */
+	if (found_sink)
+		*depth = found_depth;
+	return found_sink;
+}
+
+/**
+ * coresight_find_default_sink: Find a sink suitable for use as a
+ * default sink.
+ *
+ * @csdev: starting source to find a connected sink.
+ *
+ * Walks connections graph looking for a suitable sink to enable for the
+ * supplied source. Uses CoreSight device subtypes and distance from source
+ * to select the best sink.
+ *
+ * If a sink is found, then the default sink for this device is set and
+ * will be automatically used in future.
+ *
+ * Used in cases where the CoreSight user (perf / sysfs) has not selected a
+ * sink.
+ */
+struct coresight_device *
+coresight_find_default_sink(struct coresight_device *csdev)
+{
+	int depth = 0;
+
+	/* look for a default sink if we have not found for this device */
+	if (!csdev->def_sink) {
+		if (coresight_is_percpu_source(csdev))
+			csdev->def_sink = per_cpu(csdev_sink, source_ops(csdev)->cpu_id(csdev));
+		if (!csdev->def_sink)
+			csdev->def_sink = coresight_find_sink(csdev, &depth);
+	}
+	return csdev->def_sink;
+}
+
+static int coresight_remove_sink_ref(struct device *dev, void *data)
+{
+	struct coresight_device *sink = data;
+	struct coresight_device *source = to_coresight_device(dev);
+
+	if (source->def_sink == sink)
+		source->def_sink = NULL;
+	return 0;
+}
+
+/**
+ * coresight_clear_default_sink: Remove all default sink references to the
+ * supplied sink.
+ *
+ * If supplied device is a sink, then check all the bus devices and clear
+ * out all the references to this sink from the coresight_device def_sink
+ * parameter.
+ *
+ * @csdev: coresight sink - remove references to this from all sources.
+ */
+static void coresight_clear_default_sink(struct coresight_device *csdev)
+{
+	if ((csdev->type == CORESIGHT_DEV_TYPE_SINK) ||
+	    (csdev->type == CORESIGHT_DEV_TYPE_LINKSINK)) {
+		bus_for_each_dev(&coresight_bustype, NULL, csdev,
+				 coresight_remove_sink_ref);
+	}
+}
+
+/** coresight_validate_source - make sure a source has the right credentials
+ *  @csdev:	the device structure for a source.
+ *  @function:	the function this was called from.
+ *
+ * Assumes the coresight_mutex is held.
+ */
+static int coresight_validate_source(struct coresight_device *csdev,
+				     const char *function)
+{
+	u32 type, subtype;
+
+	type = csdev->type;
+	subtype = csdev->subtype.source_subtype;
+
+	if (type != CORESIGHT_DEV_TYPE_SOURCE) {
+		dev_err(&csdev->dev, "wrong device type in %s\n", function);
+		return -EINVAL;
+	}
+
+	if (subtype != CORESIGHT_DEV_SUBTYPE_SOURCE_PROC &&
+	    subtype != CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE &&
+	    subtype != CORESIGHT_DEV_SUBTYPE_SOURCE_OTHERS) {
+		dev_err(&csdev->dev, "wrong device subtype in %s\n", function);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int coresight_enable(struct coresight_device *csdev)
+{
+	int cpu, ret = 0;
+	struct coresight_device *sink;
+	struct list_head *path;
+	enum coresight_dev_subtype_source subtype;
+	u32 hash;
+
+	subtype = csdev->subtype.source_subtype;
+
+	mutex_lock(&coresight_mutex);
+
+	ret = coresight_validate_source(csdev, __func__);
+	if (ret)
+		goto out;
+
+	if (csdev->enable) {
+		/*
+		 * There could be multiple applications driving the software
+		 * source. So keep the refcount for each such user when the
+		 * source is already enabled.
+		 */
+		if (subtype == CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE)
+			atomic_inc(&csdev->refcnt);
+		goto out;
+	}
+
+	sink = coresight_get_enabled_sink(csdev);
+	if (!sink) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	path = coresight_build_path(csdev, sink);
+	if (IS_ERR(path)) {
+		pr_err("building path(s) failed\n");
+		ret = PTR_ERR(path);
+		goto out;
+	}
+
+	ret = coresight_enable_path(path, CS_MODE_SYSFS, NULL);
+	if (ret)
+		goto err_path;
+
+	ret = coresight_enable_source(csdev, CS_MODE_SYSFS, NULL);
+	if (ret)
+		goto err_source;
+
+	switch (subtype) {
+	case CORESIGHT_DEV_SUBTYPE_SOURCE_PROC:
+		/*
+		 * When working from sysFS it is important to keep track
+		 * of the paths that were created so that they can be
+		 * undone in 'coresight_disable()'.  Since there can only
+		 * be a single session per tracer (when working from sysFS)
+		 * a per-cpu variable will do just fine.
+		 */
+		cpu = source_ops(csdev)->cpu_id(csdev);
+		per_cpu(tracer_path, cpu) = path;
+		break;
+	case CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE:
+	case CORESIGHT_DEV_SUBTYPE_SOURCE_OTHERS:
+		/*
+		 * Use the hash of source's device name as ID
+		 * and map the ID to the pointer of the path.
+		 */
+		hash = hashlen_hash(hashlen_string(NULL, dev_name(&csdev->dev)));
+		ret = idr_alloc_u32(&path_idr, path, &hash, hash, GFP_KERNEL);
+		if (ret)
+			goto err_source;
+		break;
+	default:
+		/* We can't be here */
+		break;
+	}
+
+out:
+	mutex_unlock(&coresight_mutex);
+	return ret;
+
+err_source:
+	coresight_disable_path(path);
+
+err_path:
+	coresight_release_path(path);
+	goto out;
+}
+EXPORT_SYMBOL_GPL(coresight_enable);
+
+void coresight_disable(struct coresight_device *csdev)
+{
+	int cpu, ret;
+	struct list_head *path = NULL;
+	u32 hash;
+
+	mutex_lock(&coresight_mutex);
+
+	ret = coresight_validate_source(csdev, __func__);
+	if (ret)
+		goto out;
+
+	if (!csdev->enable || !coresight_disable_source(csdev, NULL))
+		goto out;
+
+	switch (csdev->subtype.source_subtype) {
+	case CORESIGHT_DEV_SUBTYPE_SOURCE_PROC:
+		cpu = source_ops(csdev)->cpu_id(csdev);
+		path = per_cpu(tracer_path, cpu);
+		per_cpu(tracer_path, cpu) = NULL;
+		break;
+	case CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE:
+	case CORESIGHT_DEV_SUBTYPE_SOURCE_OTHERS:
+		hash = hashlen_hash(hashlen_string(NULL, dev_name(&csdev->dev)));
+		/* Find the path by the hash. */
+		path = idr_find(&path_idr, hash);
+		if (path == NULL) {
+			pr_err("Path is not found for %s\n", dev_name(&csdev->dev));
+			goto out;
+		}
+		idr_remove(&path_idr, hash);
+		break;
+	default:
+		/* We can't be here */
+		break;
+	}
+
+	coresight_disable_path(path);
+	coresight_release_path(path);
+
+out:
+	mutex_unlock(&coresight_mutex);
+}
+EXPORT_SYMBOL_GPL(coresight_disable);
+
+static ssize_t enable_sink_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct coresight_device *csdev = to_coresight_device(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%u\n", csdev->activated);
+}
+
+static ssize_t enable_sink_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct coresight_device *csdev = to_coresight_device(dev);
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val)
+		csdev->activated = true;
+	else
+		csdev->activated = false;
+
+	return size;
+
+}
+static DEVICE_ATTR_RW(enable_sink);
+
+static ssize_t enable_source_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct coresight_device *csdev = to_coresight_device(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%u\n", csdev->enable);
+}
+
+static ssize_t enable_source_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t size)
+{
+	int ret = 0;
+	unsigned long val;
+	struct coresight_device *csdev = to_coresight_device(dev);
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val) {
+		ret = coresight_enable(csdev);
+		if (ret)
+			return ret;
+	} else {
+		coresight_disable(csdev);
+	}
+
+	return size;
+}
+static DEVICE_ATTR_RW(enable_source);
+
+static struct attribute *coresight_sink_attrs[] = {
+	&dev_attr_enable_sink.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(coresight_sink);
+
+static struct attribute *coresight_source_attrs[] = {
+	&dev_attr_enable_source.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(coresight_source);
+
+static struct device_type coresight_dev_type[] = {
+	{
+		.name = "sink",
+		.groups = coresight_sink_groups,
+	},
+	{
+		.name = "link",
+	},
+	{
+		.name = "linksink",
+		.groups = coresight_sink_groups,
+	},
+	{
+		.name = "source",
+		.groups = coresight_source_groups,
+	},
+	{
+		.name = "helper",
+	}
+};
+/* Ensure the enum matches the names and groups */
+static_assert(ARRAY_SIZE(coresight_dev_type) == CORESIGHT_DEV_TYPE_MAX);
+
+static void coresight_device_release(struct device *dev)
+{
+	struct coresight_device *csdev = to_coresight_device(dev);
+
+	fwnode_handle_put(csdev->dev.fwnode);
+	kfree(csdev);
+}
+
+static int coresight_orphan_match(struct device *dev, void *data)
+{
+	int i, ret = 0;
+	bool still_orphan = false;
+	struct coresight_device *dst_csdev = data;
+	struct coresight_device *src_csdev = to_coresight_device(dev);
+	struct coresight_connection *conn;
+	bool fixup_self = (src_csdev == dst_csdev);
+
+	/* Move on to another component if no connection is orphan */
+	if (!src_csdev->orphan)
+		return 0;
+	/*
+	 * Circle through all the connections of that component.  If we find
+	 * an orphan connection whose name matches @dst_csdev, link it.
+	 */
+	for (i = 0; i < src_csdev->pdata->nr_outconns; i++) {
+		conn = src_csdev->pdata->out_conns[i];
+
+		/* Skip the port if it's already connected. */
+		if (conn->dest_dev)
+			continue;
+
+		/*
+		 * If we are at the "new" device, which triggered this search,
+		 * we must find the remote device from the fwnode in the
+		 * connection.
+		 */
+		if (fixup_self)
+			dst_csdev = coresight_find_csdev_by_fwnode(
+				conn->dest_fwnode);
+
+		/* Does it match this newly added device? */
+		if (dst_csdev && conn->dest_fwnode == dst_csdev->dev.fwnode) {
+			ret = coresight_make_links(src_csdev, conn, dst_csdev);
+			if (ret)
+				return ret;
+
+			/*
+			 * Install the device connection. This also indicates that
+			 * the links are operational on both ends.
+			 */
+			conn->dest_dev = dst_csdev;
+			conn->src_dev = src_csdev;
+
+			ret = coresight_add_in_conn(conn);
+			if (ret)
+				return ret;
+		} else {
+			/* This component still has an orphan */
+			still_orphan = true;
+		}
+	}
+
+	src_csdev->orphan = still_orphan;
+
+	/*
+	 * Returning '0' in case we didn't encounter any error,
+	 * ensures that all known component on the bus will be checked.
+	 */
+	return 0;
+}
+
+static int coresight_fixup_orphan_conns(struct coresight_device *csdev)
+{
+	return bus_for_each_dev(&coresight_bustype, NULL,
+			 csdev, coresight_orphan_match);
+}
+
+/* coresight_remove_conns - Remove other device's references to this device */
+static void coresight_remove_conns(struct coresight_device *csdev)
+{
+	int i, j;
+	struct coresight_connection *conn;
+
+	/*
+	 * Remove the input connection references from the destination device
+	 * for each output connection.
+	 */
+	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
+		conn = csdev->pdata->out_conns[i];
+		if (!conn->dest_dev)
+			continue;
+
+		for (j = 0; j < conn->dest_dev->pdata->nr_inconns; ++j)
+			if (conn->dest_dev->pdata->in_conns[j] == conn) {
+				conn->dest_dev->pdata->in_conns[j] = NULL;
+				break;
+			}
+	}
+
+	/*
+	 * For all input connections, remove references to this device.
+	 * Connection objects are shared so modifying this device's input
+	 * connections affects the other device's output connection.
+	 */
+	for (i = 0; i < csdev->pdata->nr_inconns; ++i) {
+		conn = csdev->pdata->in_conns[i];
+		/* Input conns array is sparse */
+		if (!conn)
+			continue;
+
+		conn->src_dev->orphan = true;
+		coresight_remove_links(conn->src_dev, conn);
+		conn->dest_dev = NULL;
+	}
+}
+
+/**
+ * coresight_timeout - loop until a bit has changed to a specific register
+ *			state.
+ * @csa: coresight device access for the device
+ * @offset: Offset of the register from the base of the device.
+ * @position: the position of the bit of interest.
+ * @value: the value the bit should have.
+ *
+ * Return: 0 as soon as the bit has taken the desired state or -EAGAIN if
+ * TIMEOUT_US has elapsed, which ever happens first.
+ */
+int coresight_timeout(struct csdev_access *csa, u32 offset,
+		      int position, int value)
+{
+	int i;
+	u32 val;
+
+	for (i = TIMEOUT_US; i > 0; i--) {
+		val = csdev_access_read32(csa, offset);
+		/* waiting on the bit to go from 0 to 1 */
+		if (value) {
+			if (val & BIT(position))
+				return 0;
+		/* waiting on the bit to go from 1 to 0 */
+		} else {
+			if (!(val & BIT(position)))
+				return 0;
+		}
+
+		/*
+		 * Delay is arbitrary - the specification doesn't say how long
+		 * we are expected to wait.  Extra check required to make sure
+		 * we don't wait needlessly on the last iteration.
+		 */
+		if (i - 1)
+			udelay(1);
+	}
+
+	return -EAGAIN;
+}
+EXPORT_SYMBOL_GPL(coresight_timeout);
+
+u32 coresight_relaxed_read32(struct coresight_device *csdev, u32 offset)
+{
+	return csdev_access_relaxed_read32(&csdev->access, offset);
+}
+
+u32 coresight_read32(struct coresight_device *csdev, u32 offset)
+{
+	return csdev_access_read32(&csdev->access, offset);
+}
+
+void coresight_relaxed_write32(struct coresight_device *csdev,
+			       u32 val, u32 offset)
+{
+	csdev_access_relaxed_write32(&csdev->access, val, offset);
+}
+
+void coresight_write32(struct coresight_device *csdev, u32 val, u32 offset)
+{
+	csdev_access_write32(&csdev->access, val, offset);
+}
+
+u64 coresight_relaxed_read64(struct coresight_device *csdev, u32 offset)
+{
+	return csdev_access_relaxed_read64(&csdev->access, offset);
+}
+
+u64 coresight_read64(struct coresight_device *csdev, u32 offset)
+{
+	return csdev_access_read64(&csdev->access, offset);
+}
+
+void coresight_relaxed_write64(struct coresight_device *csdev,
+			       u64 val, u32 offset)
+{
+	csdev_access_relaxed_write64(&csdev->access, val, offset);
+}
+
+void coresight_write64(struct coresight_device *csdev, u64 val, u32 offset)
+{
+	csdev_access_write64(&csdev->access, val, offset);
+}
+
+/*
+ * coresight_release_platform_data: Release references to the devices connected
+ * to the output port of this device.
+ */
+void coresight_release_platform_data(struct coresight_device *csdev,
+				     struct device *dev,
+				     struct coresight_platform_data *pdata)
+{
+	int i;
+	struct coresight_connection **conns = pdata->out_conns;
+
+	for (i = 0; i < pdata->nr_outconns; i++) {
+		/* If we have made the links, remove them now */
+		if (csdev && conns[i]->dest_dev)
+			coresight_remove_links(csdev, conns[i]);
+		/*
+		 * Drop the refcount and clear the handle as this device
+		 * is going away
+		 */
+		fwnode_handle_put(conns[i]->dest_fwnode);
+		conns[i]->dest_fwnode = NULL;
+		devm_kfree(dev, conns[i]);
+	}
+	devm_kfree(dev, pdata->out_conns);
+	devm_kfree(dev, pdata->in_conns);
+	devm_kfree(dev, pdata);
+	if (csdev)
+		coresight_remove_conns_sysfs_group(csdev);
+}
+
+struct coresight_device *coresight_register(struct coresight_desc *desc)
+{
+	int ret;
+	struct coresight_device *csdev;
+	bool registered = false;
+
+	csdev = kzalloc(sizeof(*csdev), GFP_KERNEL);
+	if (!csdev) {
+		ret = -ENOMEM;
+		goto err_out;
+	}
+
+	csdev->pdata = desc->pdata;
+
+	csdev->type = desc->type;
+	csdev->subtype = desc->subtype;
+	csdev->ops = desc->ops;
+	csdev->access = desc->access;
+	csdev->orphan = true;
+
+	csdev->dev.type = &coresight_dev_type[desc->type];
+	csdev->dev.groups = desc->groups;
+	csdev->dev.parent = desc->dev;
+	csdev->dev.release = coresight_device_release;
+	csdev->dev.bus = &coresight_bustype;
+	/*
+	 * Hold the reference to our parent device. This will be
+	 * dropped only in coresight_device_release().
+	 */
+	csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev));
+	dev_set_name(&csdev->dev, "%s", desc->name);
+
+	/*
+	 * Make sure the device registration and the connection fixup
+	 * are synchronised, so that we don't see uninitialised devices
+	 * on the coresight bus while trying to resolve the connections.
+	 */
+	mutex_lock(&coresight_mutex);
+
+	ret = device_register(&csdev->dev);
+	if (ret) {
+		put_device(&csdev->dev);
+		/*
+		 * All resources are free'd explicitly via
+		 * coresight_device_release(), triggered from put_device().
+		 */
+		goto out_unlock;
+	}
+
+	if (csdev->type == CORESIGHT_DEV_TYPE_SINK ||
+	    csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) {
+		ret = etm_perf_add_symlink_sink(csdev);
+
+		if (ret) {
+			device_unregister(&csdev->dev);
+			/*
+			 * As with the above, all resources are free'd
+			 * explicitly via coresight_device_release() triggered
+			 * from put_device(), which is in turn called from
+			 * function device_unregister().
+			 */
+			goto out_unlock;
+		}
+	}
+	/* Device is now registered */
+	registered = true;
+
+	ret = coresight_create_conns_sysfs_group(csdev);
+	if (!ret)
+		ret = coresight_fixup_orphan_conns(csdev);
+
+out_unlock:
+	mutex_unlock(&coresight_mutex);
+	/* Success */
+	if (!ret) {
+		if (cti_assoc_ops && cti_assoc_ops->add)
+			cti_assoc_ops->add(csdev);
+		return csdev;
+	}
+
+	/* Unregister the device if needed */
+	if (registered) {
+		coresight_unregister(csdev);
+		return ERR_PTR(ret);
+	}
+
+err_out:
+	/* Cleanup the connection information */
+	coresight_release_platform_data(NULL, desc->dev, desc->pdata);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(coresight_register);
+
+void coresight_unregister(struct coresight_device *csdev)
+{
+	etm_perf_del_symlink_sink(csdev);
+	/* Remove references of that device in the topology */
+	if (cti_assoc_ops && cti_assoc_ops->remove)
+		cti_assoc_ops->remove(csdev);
+	coresight_remove_conns(csdev);
+	coresight_clear_default_sink(csdev);
+	coresight_release_platform_data(csdev, csdev->dev.parent, csdev->pdata);
+	device_unregister(&csdev->dev);
+}
+EXPORT_SYMBOL_GPL(coresight_unregister);
+
+
+/*
+ * coresight_search_device_idx - Search the fwnode handle of a device
+ * in the given dev_idx list. Must be called with the coresight_mutex held.
+ *
+ * Returns the index of the entry, when found. Otherwise, -ENOENT.
+ */
+static inline int coresight_search_device_idx(struct coresight_dev_list *dict,
+					      struct fwnode_handle *fwnode)
+{
+	int i;
+
+	for (i = 0; i < dict->nr_idx; i++)
+		if (dict->fwnode_list[i] == fwnode)
+			return i;
+	return -ENOENT;
+}
+
+static bool coresight_compare_type(enum coresight_dev_type type_a,
+				   union coresight_dev_subtype subtype_a,
+				   enum coresight_dev_type type_b,
+				   union coresight_dev_subtype subtype_b)
+{
+	if (type_a != type_b)
+		return false;
+
+	switch (type_a) {
+	case CORESIGHT_DEV_TYPE_SINK:
+		return subtype_a.sink_subtype == subtype_b.sink_subtype;
+	case CORESIGHT_DEV_TYPE_LINK:
+		return subtype_a.link_subtype == subtype_b.link_subtype;
+	case CORESIGHT_DEV_TYPE_LINKSINK:
+		return subtype_a.link_subtype == subtype_b.link_subtype &&
+		       subtype_a.sink_subtype == subtype_b.sink_subtype;
+	case CORESIGHT_DEV_TYPE_SOURCE:
+		return subtype_a.source_subtype == subtype_b.source_subtype;
+	case CORESIGHT_DEV_TYPE_HELPER:
+		return subtype_a.helper_subtype == subtype_b.helper_subtype;
+	default:
+		return false;
+	}
+}
+
+struct coresight_device *
+coresight_find_input_type(struct coresight_platform_data *pdata,
+			  enum coresight_dev_type type,
+			  union coresight_dev_subtype subtype)
+{
+	int i;
+	struct coresight_connection *conn;
+
+	for (i = 0; i < pdata->nr_inconns; ++i) {
+		conn = pdata->in_conns[i];
+		if (conn &&
+		    coresight_compare_type(type, subtype, conn->src_dev->type,
+					   conn->src_dev->subtype))
+			return conn->src_dev;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(coresight_find_input_type);
+
+struct coresight_device *
+coresight_find_output_type(struct coresight_platform_data *pdata,
+			   enum coresight_dev_type type,
+			   union coresight_dev_subtype subtype)
+{
+	int i;
+	struct coresight_connection *conn;
+
+	for (i = 0; i < pdata->nr_outconns; ++i) {
+		conn = pdata->out_conns[i];
+		if (conn->dest_dev &&
+		    coresight_compare_type(type, subtype, conn->dest_dev->type,
+					   conn->dest_dev->subtype))
+			return conn->dest_dev;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(coresight_find_output_type);
+
+bool coresight_loses_context_with_cpu(struct device *dev)
+{
+	return fwnode_property_present(dev_fwnode(dev),
+				       "arm,coresight-loses-context-with-cpu");
+}
+EXPORT_SYMBOL_GPL(coresight_loses_context_with_cpu);
+
+/*
+ * coresight_alloc_device_name - Get an index for a given device in the
+ * device index list specific to a driver. An index is allocated for a
+ * device and is tracked with the fwnode_handle to prevent allocating
+ * duplicate indices for the same device (e.g, if we defer probing of
+ * a device due to dependencies), in case the index is requested again.
+ */
+char *coresight_alloc_device_name(struct coresight_dev_list *dict,
+				  struct device *dev)
+{
+	int idx;
+	char *name = NULL;
+	struct fwnode_handle **list;
+
+	mutex_lock(&coresight_mutex);
+
+	idx = coresight_search_device_idx(dict, dev_fwnode(dev));
+	if (idx < 0) {
+		/* Make space for the new entry */
+		idx = dict->nr_idx;
+		list = krealloc_array(dict->fwnode_list,
+				      idx + 1, sizeof(*dict->fwnode_list),
+				      GFP_KERNEL);
+		if (ZERO_OR_NULL_PTR(list)) {
+			idx = -ENOMEM;
+			goto done;
+		}
+
+		list[idx] = dev_fwnode(dev);
+		dict->fwnode_list = list;
+		dict->nr_idx = idx + 1;
+	}
+
+	name = devm_kasprintf(dev, GFP_KERNEL, "%s%d", dict->pfx, idx);
+done:
+	mutex_unlock(&coresight_mutex);
+	return name;
+}
+EXPORT_SYMBOL_GPL(coresight_alloc_device_name);
+
+struct bus_type coresight_bustype = {
+	.name	= "coresight",
+};
+
+static int __init coresight_init(void)
+{
+	int ret;
+
+	ret = bus_register(&coresight_bustype);
+	if (ret)
+		return ret;
+
+	ret = etm_perf_init();
+	if (ret)
+		goto exit_bus_unregister;
+
+	/* initialise the coresight syscfg API */
+	ret = cscfg_init();
+	if (!ret)
+		return 0;
+
+	etm_perf_exit();
+exit_bus_unregister:
+	bus_unregister(&coresight_bustype);
+	return ret;
+}
+
+static void __exit coresight_exit(void)
+{
+	cscfg_exit();
+	etm_perf_exit();
+	bus_unregister(&coresight_bustype);
+}
+
+module_init(coresight_init);
+module_exit(coresight_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Pratik Patel <pratikp@codeaurora.org>");
+MODULE_AUTHOR("Mathieu Poirier <mathieu.poirier@linaro.org>");
+MODULE_DESCRIPTION("Arm CoreSight tracer driver");
diff --git a/drivers/hwtracing/coresight/coresight-cpu-debug.c b/drivers/hwtracing/coresight/coresight-cpu-debug.c
new file mode 100644
index 0000000000..1874df7c6a
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-cpu-debug.c
@@ -0,0 +1,684 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2017 Linaro Limited. All rights reserved.
+ *
+ * Author: Leo Yan <leo.yan@linaro.org>
+ */
+#include <linux/amba/bus.h>
+#include <linux/coresight.h>
+#include <linux/cpu.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/panic_notifier.h>
+#include <linux/pm_qos.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+
+#include "coresight-priv.h"
+
+#define EDPCSR				0x0A0
+#define EDCIDSR				0x0A4
+#define EDVIDSR				0x0A8
+#define EDPCSR_HI			0x0AC
+#define EDOSLAR				0x300
+#define EDPRCR				0x310
+#define EDPRSR				0x314
+#define EDDEVID1			0xFC4
+#define EDDEVID				0xFC8
+
+#define EDPCSR_PROHIBITED		0xFFFFFFFF
+
+/* bits definition for EDPCSR */
+#define EDPCSR_THUMB			BIT(0)
+#define EDPCSR_ARM_INST_MASK		GENMASK(31, 2)
+#define EDPCSR_THUMB_INST_MASK		GENMASK(31, 1)
+
+/* bits definition for EDPRCR */
+#define EDPRCR_COREPURQ			BIT(3)
+#define EDPRCR_CORENPDRQ		BIT(0)
+
+/* bits definition for EDPRSR */
+#define EDPRSR_DLK			BIT(6)
+#define EDPRSR_PU			BIT(0)
+
+/* bits definition for EDVIDSR */
+#define EDVIDSR_NS			BIT(31)
+#define EDVIDSR_E2			BIT(30)
+#define EDVIDSR_E3			BIT(29)
+#define EDVIDSR_HV			BIT(28)
+#define EDVIDSR_VMID			GENMASK(7, 0)
+
+/*
+ * bits definition for EDDEVID1:PSCROffset
+ *
+ * NOTE: armv8 and armv7 have different definition for the register,
+ * so consolidate the bits definition as below:
+ *
+ * 0b0000 - Sample offset applies based on the instruction state, we
+ *          rely on EDDEVID to check if EDPCSR is implemented or not
+ * 0b0001 - No offset applies.
+ * 0b0010 - No offset applies, but do not use in AArch32 mode
+ *
+ */
+#define EDDEVID1_PCSR_OFFSET_MASK	GENMASK(3, 0)
+#define EDDEVID1_PCSR_OFFSET_INS_SET	(0x0)
+#define EDDEVID1_PCSR_NO_OFFSET_DIS_AARCH32	(0x2)
+
+/* bits definition for EDDEVID */
+#define EDDEVID_PCSAMPLE_MODE		GENMASK(3, 0)
+#define EDDEVID_IMPL_EDPCSR		(0x1)
+#define EDDEVID_IMPL_EDPCSR_EDCIDSR	(0x2)
+#define EDDEVID_IMPL_FULL		(0x3)
+
+#define DEBUG_WAIT_SLEEP		1000
+#define DEBUG_WAIT_TIMEOUT		32000
+
+struct debug_drvdata {
+	void __iomem	*base;
+	struct device	*dev;
+	int		cpu;
+
+	bool		edpcsr_present;
+	bool		edcidsr_present;
+	bool		edvidsr_present;
+	bool		pc_has_offset;
+
+	u32		edpcsr;
+	u32		edpcsr_hi;
+	u32		edprsr;
+	u32		edvidsr;
+	u32		edcidsr;
+};
+
+static DEFINE_MUTEX(debug_lock);
+static DEFINE_PER_CPU(struct debug_drvdata *, debug_drvdata);
+static int debug_count;
+static struct dentry *debug_debugfs_dir;
+
+static bool debug_enable = IS_ENABLED(CONFIG_CORESIGHT_CPU_DEBUG_DEFAULT_ON);
+module_param_named(enable, debug_enable, bool, 0600);
+MODULE_PARM_DESC(enable, "Control to enable coresight CPU debug functionality");
+
+static void debug_os_unlock(struct debug_drvdata *drvdata)
+{
+	/* Unlocks the debug registers */
+	writel_relaxed(0x0, drvdata->base + EDOSLAR);
+
+	/* Make sure the registers are unlocked before accessing */
+	wmb();
+}
+
+/*
+ * According to ARM DDI 0487A.k, before access external debug
+ * registers should firstly check the access permission; if any
+ * below condition has been met then cannot access debug
+ * registers to avoid lockup issue:
+ *
+ * - CPU power domain is powered off;
+ * - The OS Double Lock is locked;
+ *
+ * By checking EDPRSR can get to know if meet these conditions.
+ */
+static bool debug_access_permitted(struct debug_drvdata *drvdata)
+{
+	/* CPU is powered off */
+	if (!(drvdata->edprsr & EDPRSR_PU))
+		return false;
+
+	/* The OS Double Lock is locked */
+	if (drvdata->edprsr & EDPRSR_DLK)
+		return false;
+
+	return true;
+}
+
+static void debug_force_cpu_powered_up(struct debug_drvdata *drvdata)
+{
+	u32 edprcr;
+
+try_again:
+
+	/*
+	 * Send request to power management controller and assert
+	 * DBGPWRUPREQ signal; if power management controller has
+	 * sane implementation, it should enable CPU power domain
+	 * in case CPU is in low power state.
+	 */
+	edprcr = readl_relaxed(drvdata->base + EDPRCR);
+	edprcr |= EDPRCR_COREPURQ;
+	writel_relaxed(edprcr, drvdata->base + EDPRCR);
+
+	/* Wait for CPU to be powered up (timeout~=32ms) */
+	if (readx_poll_timeout_atomic(readl_relaxed, drvdata->base + EDPRSR,
+			drvdata->edprsr, (drvdata->edprsr & EDPRSR_PU),
+			DEBUG_WAIT_SLEEP, DEBUG_WAIT_TIMEOUT)) {
+		/*
+		 * Unfortunately the CPU cannot be powered up, so return
+		 * back and later has no permission to access other
+		 * registers. For this case, should disable CPU low power
+		 * states to ensure CPU power domain is enabled!
+		 */
+		dev_err(drvdata->dev, "%s: power up request for CPU%d failed\n",
+			__func__, drvdata->cpu);
+		return;
+	}
+
+	/*
+	 * At this point the CPU is powered up, so set the no powerdown
+	 * request bit so we don't lose power and emulate power down.
+	 */
+	edprcr = readl_relaxed(drvdata->base + EDPRCR);
+	edprcr |= EDPRCR_COREPURQ | EDPRCR_CORENPDRQ;
+	writel_relaxed(edprcr, drvdata->base + EDPRCR);
+
+	drvdata->edprsr = readl_relaxed(drvdata->base + EDPRSR);
+
+	/* The core power domain got switched off on use, try again */
+	if (unlikely(!(drvdata->edprsr & EDPRSR_PU)))
+		goto try_again;
+}
+
+static void debug_read_regs(struct debug_drvdata *drvdata)
+{
+	u32 save_edprcr;
+
+	CS_UNLOCK(drvdata->base);
+
+	/* Unlock os lock */
+	debug_os_unlock(drvdata);
+
+	/* Save EDPRCR register */
+	save_edprcr = readl_relaxed(drvdata->base + EDPRCR);
+
+	/*
+	 * Ensure CPU power domain is enabled to let registers
+	 * are accessiable.
+	 */
+	debug_force_cpu_powered_up(drvdata);
+
+	if (!debug_access_permitted(drvdata))
+		goto out;
+
+	drvdata->edpcsr = readl_relaxed(drvdata->base + EDPCSR);
+
+	/*
+	 * As described in ARM DDI 0487A.k, if the processing
+	 * element (PE) is in debug state, or sample-based
+	 * profiling is prohibited, EDPCSR reads as 0xFFFFFFFF;
+	 * EDCIDSR, EDVIDSR and EDPCSR_HI registers also become
+	 * UNKNOWN state. So directly bail out for this case.
+	 */
+	if (drvdata->edpcsr == EDPCSR_PROHIBITED)
+		goto out;
+
+	/*
+	 * A read of the EDPCSR normally has the side-effect of
+	 * indirectly writing to EDCIDSR, EDVIDSR and EDPCSR_HI;
+	 * at this point it's safe to read value from them.
+	 */
+	if (IS_ENABLED(CONFIG_64BIT))
+		drvdata->edpcsr_hi = readl_relaxed(drvdata->base + EDPCSR_HI);
+
+	if (drvdata->edcidsr_present)
+		drvdata->edcidsr = readl_relaxed(drvdata->base + EDCIDSR);
+
+	if (drvdata->edvidsr_present)
+		drvdata->edvidsr = readl_relaxed(drvdata->base + EDVIDSR);
+
+out:
+	/* Restore EDPRCR register */
+	writel_relaxed(save_edprcr, drvdata->base + EDPRCR);
+
+	CS_LOCK(drvdata->base);
+}
+
+#ifdef CONFIG_64BIT
+static unsigned long debug_adjust_pc(struct debug_drvdata *drvdata)
+{
+	return (unsigned long)drvdata->edpcsr_hi << 32 |
+	       (unsigned long)drvdata->edpcsr;
+}
+#else
+static unsigned long debug_adjust_pc(struct debug_drvdata *drvdata)
+{
+	unsigned long arm_inst_offset = 0, thumb_inst_offset = 0;
+	unsigned long pc;
+
+	pc = (unsigned long)drvdata->edpcsr;
+
+	if (drvdata->pc_has_offset) {
+		arm_inst_offset = 8;
+		thumb_inst_offset = 4;
+	}
+
+	/* Handle thumb instruction */
+	if (pc & EDPCSR_THUMB) {
+		pc = (pc & EDPCSR_THUMB_INST_MASK) - thumb_inst_offset;
+		return pc;
+	}
+
+	/*
+	 * Handle arm instruction offset, if the arm instruction
+	 * is not 4 byte alignment then it's possible the case
+	 * for implementation defined; keep original value for this
+	 * case and print info for notice.
+	 */
+	if (pc & BIT(1))
+		dev_emerg(drvdata->dev,
+			  "Instruction offset is implementation defined\n");
+	else
+		pc = (pc & EDPCSR_ARM_INST_MASK) - arm_inst_offset;
+
+	return pc;
+}
+#endif
+
+static void debug_dump_regs(struct debug_drvdata *drvdata)
+{
+	struct device *dev = drvdata->dev;
+	unsigned long pc;
+
+	dev_emerg(dev, " EDPRSR:  %08x (Power:%s DLK:%s)\n",
+		  drvdata->edprsr,
+		  drvdata->edprsr & EDPRSR_PU ? "On" : "Off",
+		  drvdata->edprsr & EDPRSR_DLK ? "Lock" : "Unlock");
+
+	if (!debug_access_permitted(drvdata)) {
+		dev_emerg(dev, "No permission to access debug registers!\n");
+		return;
+	}
+
+	if (drvdata->edpcsr == EDPCSR_PROHIBITED) {
+		dev_emerg(dev, "CPU is in Debug state or profiling is prohibited!\n");
+		return;
+	}
+
+	pc = debug_adjust_pc(drvdata);
+	dev_emerg(dev, " EDPCSR:  %pS\n", (void *)pc);
+
+	if (drvdata->edcidsr_present)
+		dev_emerg(dev, " EDCIDSR: %08x\n", drvdata->edcidsr);
+
+	if (drvdata->edvidsr_present)
+		dev_emerg(dev, " EDVIDSR: %08x (State:%s Mode:%s Width:%dbits VMID:%x)\n",
+			  drvdata->edvidsr,
+			  drvdata->edvidsr & EDVIDSR_NS ?
+			  "Non-secure" : "Secure",
+			  drvdata->edvidsr & EDVIDSR_E3 ? "EL3" :
+				(drvdata->edvidsr & EDVIDSR_E2 ?
+				 "EL2" : "EL1/0"),
+			  drvdata->edvidsr & EDVIDSR_HV ? 64 : 32,
+			  drvdata->edvidsr & (u32)EDVIDSR_VMID);
+}
+
+static void debug_init_arch_data(void *info)
+{
+	struct debug_drvdata *drvdata = info;
+	u32 mode, pcsr_offset;
+	u32 eddevid, eddevid1;
+
+	CS_UNLOCK(drvdata->base);
+
+	/* Read device info */
+	eddevid  = readl_relaxed(drvdata->base + EDDEVID);
+	eddevid1 = readl_relaxed(drvdata->base + EDDEVID1);
+
+	CS_LOCK(drvdata->base);
+
+	/* Parse implementation feature */
+	mode = eddevid & EDDEVID_PCSAMPLE_MODE;
+	pcsr_offset = eddevid1 & EDDEVID1_PCSR_OFFSET_MASK;
+
+	drvdata->edpcsr_present  = false;
+	drvdata->edcidsr_present = false;
+	drvdata->edvidsr_present = false;
+	drvdata->pc_has_offset   = false;
+
+	switch (mode) {
+	case EDDEVID_IMPL_FULL:
+		drvdata->edvidsr_present = true;
+		fallthrough;
+	case EDDEVID_IMPL_EDPCSR_EDCIDSR:
+		drvdata->edcidsr_present = true;
+		fallthrough;
+	case EDDEVID_IMPL_EDPCSR:
+		/*
+		 * In ARM DDI 0487A.k, the EDDEVID1.PCSROffset is used to
+		 * define if has the offset for PC sampling value; if read
+		 * back EDDEVID1.PCSROffset == 0x2, then this means the debug
+		 * module does not sample the instruction set state when
+		 * armv8 CPU in AArch32 state.
+		 */
+		drvdata->edpcsr_present =
+			((IS_ENABLED(CONFIG_64BIT) && pcsr_offset != 0) ||
+			 (pcsr_offset != EDDEVID1_PCSR_NO_OFFSET_DIS_AARCH32));
+
+		drvdata->pc_has_offset =
+			(pcsr_offset == EDDEVID1_PCSR_OFFSET_INS_SET);
+		break;
+	default:
+		break;
+	}
+}
+
+/*
+ * Dump out information on panic.
+ */
+static int debug_notifier_call(struct notifier_block *self,
+			       unsigned long v, void *p)
+{
+	int cpu;
+	struct debug_drvdata *drvdata;
+
+	/* Bail out if we can't acquire the mutex or the functionality is off */
+	if (!mutex_trylock(&debug_lock))
+		return NOTIFY_DONE;
+
+	if (!debug_enable)
+		goto skip_dump;
+
+	pr_emerg("ARM external debug module:\n");
+
+	for_each_possible_cpu(cpu) {
+		drvdata = per_cpu(debug_drvdata, cpu);
+		if (!drvdata)
+			continue;
+
+		dev_emerg(drvdata->dev, "CPU[%d]:\n", drvdata->cpu);
+
+		debug_read_regs(drvdata);
+		debug_dump_regs(drvdata);
+	}
+
+skip_dump:
+	mutex_unlock(&debug_lock);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block debug_notifier = {
+	.notifier_call = debug_notifier_call,
+};
+
+static int debug_enable_func(void)
+{
+	struct debug_drvdata *drvdata;
+	int cpu, ret = 0;
+	cpumask_t mask;
+
+	/*
+	 * Use cpumask to track which debug power domains have
+	 * been powered on and use it to handle failure case.
+	 */
+	cpumask_clear(&mask);
+
+	for_each_possible_cpu(cpu) {
+		drvdata = per_cpu(debug_drvdata, cpu);
+		if (!drvdata)
+			continue;
+
+		ret = pm_runtime_get_sync(drvdata->dev);
+		if (ret < 0)
+			goto err;
+		else
+			cpumask_set_cpu(cpu, &mask);
+	}
+
+	return 0;
+
+err:
+	/*
+	 * If pm_runtime_get_sync() has failed, need rollback on
+	 * all the other CPUs that have been enabled before that.
+	 */
+	for_each_cpu(cpu, &mask) {
+		drvdata = per_cpu(debug_drvdata, cpu);
+		pm_runtime_put_noidle(drvdata->dev);
+	}
+
+	return ret;
+}
+
+static int debug_disable_func(void)
+{
+	struct debug_drvdata *drvdata;
+	int cpu, ret, err = 0;
+
+	/*
+	 * Disable debug power domains, records the error and keep
+	 * circling through all other CPUs when an error has been
+	 * encountered.
+	 */
+	for_each_possible_cpu(cpu) {
+		drvdata = per_cpu(debug_drvdata, cpu);
+		if (!drvdata)
+			continue;
+
+		ret = pm_runtime_put(drvdata->dev);
+		if (ret < 0)
+			err = ret;
+	}
+
+	return err;
+}
+
+static ssize_t debug_func_knob_write(struct file *f,
+		const char __user *buf, size_t count, loff_t *ppos)
+{
+	u8 val;
+	int ret;
+
+	ret = kstrtou8_from_user(buf, count, 2, &val);
+	if (ret)
+		return ret;
+
+	mutex_lock(&debug_lock);
+
+	if (val == debug_enable)
+		goto out;
+
+	if (val)
+		ret = debug_enable_func();
+	else
+		ret = debug_disable_func();
+
+	if (ret) {
+		pr_err("%s: unable to %s debug function: %d\n",
+		       __func__, val ? "enable" : "disable", ret);
+		goto err;
+	}
+
+	debug_enable = val;
+out:
+	ret = count;
+err:
+	mutex_unlock(&debug_lock);
+	return ret;
+}
+
+static ssize_t debug_func_knob_read(struct file *f,
+		char __user *ubuf, size_t count, loff_t *ppos)
+{
+	ssize_t ret;
+	char buf[3];
+
+	mutex_lock(&debug_lock);
+	snprintf(buf, sizeof(buf), "%d\n", debug_enable);
+	mutex_unlock(&debug_lock);
+
+	ret = simple_read_from_buffer(ubuf, count, ppos, buf, sizeof(buf));
+	return ret;
+}
+
+static const struct file_operations debug_func_knob_fops = {
+	.open	= simple_open,
+	.read	= debug_func_knob_read,
+	.write	= debug_func_knob_write,
+};
+
+static int debug_func_init(void)
+{
+	int ret;
+
+	/* Create debugfs node */
+	debug_debugfs_dir = debugfs_create_dir("coresight_cpu_debug", NULL);
+	debugfs_create_file("enable", 0644, debug_debugfs_dir, NULL,
+			    &debug_func_knob_fops);
+
+	/* Register function to be called for panic */
+	ret = atomic_notifier_chain_register(&panic_notifier_list,
+					     &debug_notifier);
+	if (ret) {
+		pr_err("%s: unable to register notifier: %d\n",
+		       __func__, ret);
+		goto err;
+	}
+
+	return 0;
+
+err:
+	debugfs_remove_recursive(debug_debugfs_dir);
+	return ret;
+}
+
+static void debug_func_exit(void)
+{
+	atomic_notifier_chain_unregister(&panic_notifier_list,
+					 &debug_notifier);
+	debugfs_remove_recursive(debug_debugfs_dir);
+}
+
+static int debug_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	void __iomem *base;
+	struct device *dev = &adev->dev;
+	struct debug_drvdata *drvdata;
+	struct resource *res = &adev->res;
+	int ret;
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	drvdata->cpu = coresight_get_cpu(dev);
+	if (drvdata->cpu < 0)
+		return drvdata->cpu;
+
+	if (per_cpu(debug_drvdata, drvdata->cpu)) {
+		dev_err(dev, "CPU%d drvdata has already been initialized\n",
+			drvdata->cpu);
+		return -EBUSY;
+	}
+
+	drvdata->dev = &adev->dev;
+	amba_set_drvdata(adev, drvdata);
+
+	/* Validity for the resource is already checked by the AMBA core */
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	drvdata->base = base;
+
+	cpus_read_lock();
+	per_cpu(debug_drvdata, drvdata->cpu) = drvdata;
+	ret = smp_call_function_single(drvdata->cpu, debug_init_arch_data,
+				       drvdata, 1);
+	cpus_read_unlock();
+
+	if (ret) {
+		dev_err(dev, "CPU%d debug arch init failed\n", drvdata->cpu);
+		goto err;
+	}
+
+	if (!drvdata->edpcsr_present) {
+		dev_err(dev, "CPU%d sample-based profiling isn't implemented\n",
+			drvdata->cpu);
+		ret = -ENXIO;
+		goto err;
+	}
+
+	if (!debug_count++) {
+		ret = debug_func_init();
+		if (ret)
+			goto err_func_init;
+	}
+
+	mutex_lock(&debug_lock);
+	/* Turn off debug power domain if debugging is disabled */
+	if (!debug_enable)
+		pm_runtime_put(dev);
+	mutex_unlock(&debug_lock);
+
+	dev_info(dev, "Coresight debug-CPU%d initialized\n", drvdata->cpu);
+	return 0;
+
+err_func_init:
+	debug_count--;
+err:
+	per_cpu(debug_drvdata, drvdata->cpu) = NULL;
+	return ret;
+}
+
+static void debug_remove(struct amba_device *adev)
+{
+	struct device *dev = &adev->dev;
+	struct debug_drvdata *drvdata = amba_get_drvdata(adev);
+
+	per_cpu(debug_drvdata, drvdata->cpu) = NULL;
+
+	mutex_lock(&debug_lock);
+	/* Turn off debug power domain before rmmod the module */
+	if (debug_enable)
+		pm_runtime_put(dev);
+	mutex_unlock(&debug_lock);
+
+	if (!--debug_count)
+		debug_func_exit();
+}
+
+static const struct amba_cs_uci_id uci_id_debug[] = {
+	{
+		/*  CPU Debug UCI data */
+		.devarch	= 0x47706a15,
+		.devarch_mask	= 0xfff0ffff,
+		.devtype	= 0x00000015,
+	}
+};
+
+static const struct amba_id debug_ids[] = {
+	CS_AMBA_ID(0x000bbd03),				/* Cortex-A53 */
+	CS_AMBA_ID(0x000bbd07),				/* Cortex-A57 */
+	CS_AMBA_ID(0x000bbd08),				/* Cortex-A72 */
+	CS_AMBA_ID(0x000bbd09),				/* Cortex-A73 */
+	CS_AMBA_UCI_ID(0x000f0205, uci_id_debug),	/* Qualcomm Kryo */
+	CS_AMBA_UCI_ID(0x000f0211, uci_id_debug),	/* Qualcomm Kryo */
+	{},
+};
+
+MODULE_DEVICE_TABLE(amba, debug_ids);
+
+static struct amba_driver debug_driver = {
+	.drv = {
+		.name   = "coresight-cpu-debug",
+		.suppress_bind_attrs = true,
+	},
+	.probe		= debug_probe,
+	.remove		= debug_remove,
+	.id_table	= debug_ids,
+};
+
+module_amba_driver(debug_driver);
+
+MODULE_AUTHOR("Leo Yan <leo.yan@linaro.org>");
+MODULE_DESCRIPTION("ARM Coresight CPU Debug Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/hwtracing/coresight/coresight-cti-core.c b/drivers/hwtracing/coresight/coresight-cti-core.c
new file mode 100644
index 0000000000..3999d0a2cb
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-cti-core.c
@@ -0,0 +1,1015 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018 Linaro Limited, All rights reserved.
+ * Author: Mike Leach <mike.leach@linaro.org>
+ */
+
+#include <linux/amba/bus.h>
+#include <linux/atomic.h>
+#include <linux/bits.h>
+#include <linux/coresight.h>
+#include <linux/cpu_pm.h>
+#include <linux/cpuhotplug.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/pm_runtime.h>
+#include <linux/property.h>
+#include <linux/spinlock.h>
+
+#include "coresight-priv.h"
+#include "coresight-cti.h"
+
+/*
+ * CTI devices can be associated with a PE, or be connected to CoreSight
+ * hardware. We have a list of all CTIs irrespective of CPU bound or
+ * otherwise.
+ *
+ * We assume that the non-CPU CTIs are always powered as we do with sinks etc.
+ *
+ * We leave the client to figure out if all the CTIs are interconnected with
+ * the same CTM, in general this is the case but does not always have to be.
+ */
+
+/* net of CTI devices connected via CTM */
+static LIST_HEAD(ect_net);
+
+/* protect the list */
+static DEFINE_MUTEX(ect_mutex);
+
+#define csdev_to_cti_drvdata(csdev)	\
+	dev_get_drvdata(csdev->dev.parent)
+
+/* power management handling */
+static int nr_cti_cpu;
+
+/* quick lookup list for CPU bound CTIs when power handling */
+static struct cti_drvdata *cti_cpu_drvdata[NR_CPUS];
+
+/*
+ * CTI naming. CTI bound to cores will have the name cti_cpu<N> where
+ * N is the CPU ID. System CTIs will have the name cti_sys<I> where I
+ * is an index allocated by order of discovery.
+ *
+ * CTI device name list - for CTI not bound to cores.
+ */
+DEFINE_CORESIGHT_DEVLIST(cti_sys_devs, "cti_sys");
+
+/* write set of regs to hardware - call with spinlock claimed */
+void cti_write_all_hw_regs(struct cti_drvdata *drvdata)
+{
+	struct cti_config *config = &drvdata->config;
+	int i;
+
+	CS_UNLOCK(drvdata->base);
+
+	/* disable CTI before writing registers */
+	writel_relaxed(0, drvdata->base + CTICONTROL);
+
+	/* write the CTI trigger registers */
+	for (i = 0; i < config->nr_trig_max; i++) {
+		writel_relaxed(config->ctiinen[i], drvdata->base + CTIINEN(i));
+		writel_relaxed(config->ctiouten[i],
+			       drvdata->base + CTIOUTEN(i));
+	}
+
+	/* other regs */
+	writel_relaxed(config->ctigate, drvdata->base + CTIGATE);
+	writel_relaxed(config->asicctl, drvdata->base + ASICCTL);
+	writel_relaxed(config->ctiappset, drvdata->base + CTIAPPSET);
+
+	/* re-enable CTI */
+	writel_relaxed(1, drvdata->base + CTICONTROL);
+
+	CS_LOCK(drvdata->base);
+}
+
+/* write regs to hardware and enable */
+static int cti_enable_hw(struct cti_drvdata *drvdata)
+{
+	struct cti_config *config = &drvdata->config;
+	unsigned long flags;
+	int rc = 0;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	/* no need to do anything if enabled or unpowered*/
+	if (config->hw_enabled || !config->hw_powered)
+		goto cti_state_unchanged;
+
+	/* claim the device */
+	rc = coresight_claim_device(drvdata->csdev);
+	if (rc)
+		goto cti_err_not_enabled;
+
+	cti_write_all_hw_regs(drvdata);
+
+	config->hw_enabled = true;
+	drvdata->config.enable_req_count++;
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+	return rc;
+
+cti_state_unchanged:
+	drvdata->config.enable_req_count++;
+
+	/* cannot enable due to error */
+cti_err_not_enabled:
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+	return rc;
+}
+
+/* re-enable CTI on CPU when using CPU hotplug */
+static void cti_cpuhp_enable_hw(struct cti_drvdata *drvdata)
+{
+	struct cti_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	config->hw_powered = true;
+
+	/* no need to do anything if no enable request */
+	if (!drvdata->config.enable_req_count)
+		goto cti_hp_not_enabled;
+
+	/* try to claim the device */
+	if (coresight_claim_device(drvdata->csdev))
+		goto cti_hp_not_enabled;
+
+	cti_write_all_hw_regs(drvdata);
+	config->hw_enabled = true;
+	spin_unlock(&drvdata->spinlock);
+	return;
+
+	/* did not re-enable due to no claim / no request */
+cti_hp_not_enabled:
+	spin_unlock(&drvdata->spinlock);
+}
+
+/* disable hardware */
+static int cti_disable_hw(struct cti_drvdata *drvdata)
+{
+	struct cti_config *config = &drvdata->config;
+	struct coresight_device *csdev = drvdata->csdev;
+	int ret = 0;
+
+	spin_lock(&drvdata->spinlock);
+
+	/* don't allow negative refcounts, return an error */
+	if (!drvdata->config.enable_req_count) {
+		ret = -EINVAL;
+		goto cti_not_disabled;
+	}
+
+	/* check refcount - disable on 0 */
+	if (--drvdata->config.enable_req_count > 0)
+		goto cti_not_disabled;
+
+	/* no need to do anything if disabled or cpu unpowered */
+	if (!config->hw_enabled || !config->hw_powered)
+		goto cti_not_disabled;
+
+	CS_UNLOCK(drvdata->base);
+
+	/* disable CTI */
+	writel_relaxed(0, drvdata->base + CTICONTROL);
+	config->hw_enabled = false;
+
+	coresight_disclaim_device_unlocked(csdev);
+	CS_LOCK(drvdata->base);
+	spin_unlock(&drvdata->spinlock);
+	return ret;
+
+	/* not disabled this call */
+cti_not_disabled:
+	spin_unlock(&drvdata->spinlock);
+	return ret;
+}
+
+void cti_write_single_reg(struct cti_drvdata *drvdata, int offset, u32 value)
+{
+	CS_UNLOCK(drvdata->base);
+	writel_relaxed(value, drvdata->base + offset);
+	CS_LOCK(drvdata->base);
+}
+
+void cti_write_intack(struct device *dev, u32 ackval)
+{
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct cti_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	/* write if enabled */
+	if (cti_active(config))
+		cti_write_single_reg(drvdata, CTIINTACK, ackval);
+	spin_unlock(&drvdata->spinlock);
+}
+
+/*
+ * Look at the HW DEVID register for some of the HW settings.
+ * DEVID[15:8] - max number of in / out triggers.
+ */
+#define CTI_DEVID_MAXTRIGS(devid_val) ((int) BMVAL(devid_val, 8, 15))
+
+/* DEVID[19:16] - number of CTM channels */
+#define CTI_DEVID_CTMCHANNELS(devid_val) ((int) BMVAL(devid_val, 16, 19))
+
+static void cti_set_default_config(struct device *dev,
+				   struct cti_drvdata *drvdata)
+{
+	struct cti_config *config = &drvdata->config;
+	u32 devid;
+
+	devid = readl_relaxed(drvdata->base + CORESIGHT_DEVID);
+	config->nr_trig_max = CTI_DEVID_MAXTRIGS(devid);
+
+	/*
+	 * no current hardware should exceed this, but protect the driver
+	 * in case of fault / out of spec hw
+	 */
+	if (config->nr_trig_max > CTIINOUTEN_MAX) {
+		dev_warn_once(dev,
+			"Limiting HW MaxTrig value(%d) to driver max(%d)\n",
+			config->nr_trig_max, CTIINOUTEN_MAX);
+		config->nr_trig_max = CTIINOUTEN_MAX;
+	}
+
+	config->nr_ctm_channels = CTI_DEVID_CTMCHANNELS(devid);
+
+	/* Most regs default to 0 as zalloc'ed except...*/
+	config->trig_filter_enable = true;
+	config->ctigate = GENMASK(config->nr_ctm_channels - 1, 0);
+	config->enable_req_count = 0;
+}
+
+/*
+ * Add a connection entry to the list of connections for this
+ * CTI device.
+ */
+int cti_add_connection_entry(struct device *dev, struct cti_drvdata *drvdata,
+			     struct cti_trig_con *tc,
+			     struct coresight_device *csdev,
+			     const char *assoc_dev_name)
+{
+	struct cti_device *cti_dev = &drvdata->ctidev;
+
+	tc->con_dev = csdev;
+	/*
+	 * Prefer actual associated CS device dev name to supplied value -
+	 * which is likely to be node name / other conn name.
+	 */
+	if (csdev)
+		tc->con_dev_name = dev_name(&csdev->dev);
+	else if (assoc_dev_name != NULL) {
+		tc->con_dev_name = devm_kstrdup(dev,
+						assoc_dev_name, GFP_KERNEL);
+		if (!tc->con_dev_name)
+			return -ENOMEM;
+	}
+	list_add_tail(&tc->node, &cti_dev->trig_cons);
+	cti_dev->nr_trig_con++;
+
+	/* add connection usage bit info to overall info */
+	drvdata->config.trig_in_use |= tc->con_in->used_mask;
+	drvdata->config.trig_out_use |= tc->con_out->used_mask;
+
+	return 0;
+}
+
+/* create a trigger connection with appropriately sized signal groups */
+struct cti_trig_con *cti_allocate_trig_con(struct device *dev, int in_sigs,
+					   int out_sigs)
+{
+	struct cti_trig_con *tc = NULL;
+	struct cti_trig_grp *in = NULL, *out = NULL;
+
+	tc = devm_kzalloc(dev, sizeof(struct cti_trig_con), GFP_KERNEL);
+	if (!tc)
+		return tc;
+
+	in = devm_kzalloc(dev,
+			  offsetof(struct cti_trig_grp, sig_types[in_sigs]),
+			  GFP_KERNEL);
+	if (!in)
+		return NULL;
+
+	out = devm_kzalloc(dev,
+			   offsetof(struct cti_trig_grp, sig_types[out_sigs]),
+			   GFP_KERNEL);
+	if (!out)
+		return NULL;
+
+	tc->con_in = in;
+	tc->con_out = out;
+	tc->con_in->nr_sigs = in_sigs;
+	tc->con_out->nr_sigs = out_sigs;
+	return tc;
+}
+
+/*
+ * Add a default connection if nothing else is specified.
+ * single connection based on max in/out info, no assoc device
+ */
+int cti_add_default_connection(struct device *dev, struct cti_drvdata *drvdata)
+{
+	int ret = 0;
+	int n_trigs = drvdata->config.nr_trig_max;
+	u32 n_trig_mask = GENMASK(n_trigs - 1, 0);
+	struct cti_trig_con *tc = NULL;
+
+	/*
+	 * Assume max trigs for in and out,
+	 * all used, default sig types allocated
+	 */
+	tc = cti_allocate_trig_con(dev, n_trigs, n_trigs);
+	if (!tc)
+		return -ENOMEM;
+
+	tc->con_in->used_mask = n_trig_mask;
+	tc->con_out->used_mask = n_trig_mask;
+	ret = cti_add_connection_entry(dev, drvdata, tc, NULL, "default");
+	return ret;
+}
+
+/** cti channel api **/
+/* attach/detach channel from trigger - write through if enabled. */
+int cti_channel_trig_op(struct device *dev, enum cti_chan_op op,
+			enum cti_trig_dir direction, u32 channel_idx,
+			u32 trigger_idx)
+{
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct cti_config *config = &drvdata->config;
+	u32 trig_bitmask;
+	u32 chan_bitmask;
+	u32 reg_value;
+	int reg_offset;
+
+	/* ensure indexes in range */
+	if ((channel_idx >= config->nr_ctm_channels) ||
+	   (trigger_idx >= config->nr_trig_max))
+		return -EINVAL;
+
+	trig_bitmask = BIT(trigger_idx);
+
+	/* ensure registered triggers and not out filtered */
+	if (direction == CTI_TRIG_IN)	{
+		if (!(trig_bitmask & config->trig_in_use))
+			return -EINVAL;
+	} else {
+		if (!(trig_bitmask & config->trig_out_use))
+			return -EINVAL;
+
+		if ((config->trig_filter_enable) &&
+		    (config->trig_out_filter & trig_bitmask))
+			return -EINVAL;
+	}
+
+	/* update the local register values */
+	chan_bitmask = BIT(channel_idx);
+	reg_offset = (direction == CTI_TRIG_IN ? CTIINEN(trigger_idx) :
+		      CTIOUTEN(trigger_idx));
+
+	spin_lock(&drvdata->spinlock);
+
+	/* read - modify write - the trigger / channel enable value */
+	reg_value = direction == CTI_TRIG_IN ? config->ctiinen[trigger_idx] :
+		     config->ctiouten[trigger_idx];
+	if (op == CTI_CHAN_ATTACH)
+		reg_value |= chan_bitmask;
+	else
+		reg_value &= ~chan_bitmask;
+
+	/* write local copy */
+	if (direction == CTI_TRIG_IN)
+		config->ctiinen[trigger_idx] = reg_value;
+	else
+		config->ctiouten[trigger_idx] = reg_value;
+
+	/* write through if enabled */
+	if (cti_active(config))
+		cti_write_single_reg(drvdata, reg_offset, reg_value);
+	spin_unlock(&drvdata->spinlock);
+	return 0;
+}
+
+int cti_channel_gate_op(struct device *dev, enum cti_chan_gate_op op,
+			u32 channel_idx)
+{
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct cti_config *config = &drvdata->config;
+	u32 chan_bitmask;
+	u32 reg_value;
+	int err = 0;
+
+	if (channel_idx >= config->nr_ctm_channels)
+		return -EINVAL;
+
+	chan_bitmask = BIT(channel_idx);
+
+	spin_lock(&drvdata->spinlock);
+	reg_value = config->ctigate;
+	switch (op) {
+	case CTI_GATE_CHAN_ENABLE:
+		reg_value |= chan_bitmask;
+		break;
+
+	case CTI_GATE_CHAN_DISABLE:
+		reg_value &= ~chan_bitmask;
+		break;
+
+	default:
+		err = -EINVAL;
+		break;
+	}
+	if (err == 0) {
+		config->ctigate = reg_value;
+		if (cti_active(config))
+			cti_write_single_reg(drvdata, CTIGATE, reg_value);
+	}
+	spin_unlock(&drvdata->spinlock);
+	return err;
+}
+
+int cti_channel_setop(struct device *dev, enum cti_chan_set_op op,
+		      u32 channel_idx)
+{
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct cti_config *config = &drvdata->config;
+	u32 chan_bitmask;
+	u32 reg_value;
+	u32 reg_offset;
+	int err = 0;
+
+	if (channel_idx >= config->nr_ctm_channels)
+		return -EINVAL;
+
+	chan_bitmask = BIT(channel_idx);
+
+	spin_lock(&drvdata->spinlock);
+	reg_value = config->ctiappset;
+	switch (op) {
+	case CTI_CHAN_SET:
+		config->ctiappset |= chan_bitmask;
+		reg_value  = config->ctiappset;
+		reg_offset = CTIAPPSET;
+		break;
+
+	case CTI_CHAN_CLR:
+		config->ctiappset &= ~chan_bitmask;
+		reg_value = chan_bitmask;
+		reg_offset = CTIAPPCLEAR;
+		break;
+
+	case CTI_CHAN_PULSE:
+		config->ctiappset &= ~chan_bitmask;
+		reg_value = chan_bitmask;
+		reg_offset = CTIAPPPULSE;
+		break;
+
+	default:
+		err = -EINVAL;
+		break;
+	}
+
+	if ((err == 0) && cti_active(config))
+		cti_write_single_reg(drvdata, reg_offset, reg_value);
+	spin_unlock(&drvdata->spinlock);
+
+	return err;
+}
+
+static bool cti_add_sysfs_link(struct cti_drvdata *drvdata,
+			       struct cti_trig_con *tc)
+{
+	struct coresight_sysfs_link link_info;
+	int link_err = 0;
+
+	link_info.orig = drvdata->csdev;
+	link_info.orig_name = tc->con_dev_name;
+	link_info.target = tc->con_dev;
+	link_info.target_name = dev_name(&drvdata->csdev->dev);
+
+	link_err = coresight_add_sysfs_link(&link_info);
+	if (link_err)
+		dev_warn(&drvdata->csdev->dev,
+			 "Failed to set CTI sysfs link %s<=>%s\n",
+			 link_info.orig_name, link_info.target_name);
+	return !link_err;
+}
+
+static void cti_remove_sysfs_link(struct cti_drvdata *drvdata,
+				  struct cti_trig_con *tc)
+{
+	struct coresight_sysfs_link link_info;
+
+	link_info.orig = drvdata->csdev;
+	link_info.orig_name = tc->con_dev_name;
+	link_info.target = tc->con_dev;
+	link_info.target_name = dev_name(&drvdata->csdev->dev);
+	coresight_remove_sysfs_link(&link_info);
+}
+
+/*
+ * Look for a matching connection device name in the list of connections.
+ * If found then swap in the csdev name, set trig con association pointer
+ * and return found.
+ */
+static bool
+cti_match_fixup_csdev(struct cti_device *ctidev, const char *node_name,
+		      struct coresight_device *csdev)
+{
+	struct cti_trig_con *tc;
+	struct cti_drvdata *drvdata = container_of(ctidev, struct cti_drvdata,
+						   ctidev);
+
+	list_for_each_entry(tc, &ctidev->trig_cons, node) {
+		if (tc->con_dev_name) {
+			if (!strcmp(node_name, tc->con_dev_name)) {
+				/* match: so swap in csdev name & dev */
+				tc->con_dev_name = dev_name(&csdev->dev);
+				tc->con_dev = csdev;
+				/* try to set sysfs link */
+				if (cti_add_sysfs_link(drvdata, tc))
+					return true;
+				/* link failed - remove CTI reference */
+				tc->con_dev = NULL;
+				break;
+			}
+		}
+	}
+	return false;
+}
+
+/*
+ * Search the cti list to add an associated CTI into the supplied CS device
+ * This will set the association if CTI declared before the CS device.
+ * (called from coresight_register() without coresight_mutex locked).
+ */
+static void cti_add_assoc_to_csdev(struct coresight_device *csdev)
+{
+	struct cti_drvdata *ect_item;
+	struct cti_device *ctidev;
+	const char *node_name = NULL;
+
+	/* protect the list */
+	mutex_lock(&ect_mutex);
+
+	/* exit if current is an ECT device.*/
+	if ((csdev->type == CORESIGHT_DEV_TYPE_HELPER &&
+	     csdev->subtype.helper_subtype ==
+		     CORESIGHT_DEV_SUBTYPE_HELPER_ECT_CTI) ||
+	    list_empty(&ect_net))
+		goto cti_add_done;
+
+	/* if we didn't find the csdev previously we used the fwnode name */
+	node_name = cti_plat_get_node_name(dev_fwnode(csdev->dev.parent));
+	if (!node_name)
+		goto cti_add_done;
+
+	/* for each CTI in list... */
+	list_for_each_entry(ect_item, &ect_net, node) {
+		ctidev = &ect_item->ctidev;
+		if (cti_match_fixup_csdev(ctidev, node_name, csdev)) {
+			/*
+			 * if we found a matching csdev then update the ECT
+			 * association pointer for the device with this CTI.
+			 */
+			coresight_add_helper(csdev, ect_item->csdev);
+			break;
+		}
+	}
+cti_add_done:
+	mutex_unlock(&ect_mutex);
+}
+
+/*
+ * Removing the associated devices is easier.
+ */
+static void cti_remove_assoc_from_csdev(struct coresight_device *csdev)
+{
+	struct cti_drvdata *ctidrv;
+	struct cti_trig_con *tc;
+	struct cti_device *ctidev;
+	union coresight_dev_subtype cti_subtype = {
+		.helper_subtype = CORESIGHT_DEV_SUBTYPE_HELPER_ECT_CTI
+	};
+	struct coresight_device *cti_csdev = coresight_find_output_type(
+		csdev->pdata, CORESIGHT_DEV_TYPE_HELPER, cti_subtype);
+
+	if (!cti_csdev)
+		return;
+
+	mutex_lock(&ect_mutex);
+	ctidrv = csdev_to_cti_drvdata(cti_csdev);
+	ctidev = &ctidrv->ctidev;
+	list_for_each_entry(tc, &ctidev->trig_cons, node) {
+		if (tc->con_dev == csdev) {
+			cti_remove_sysfs_link(ctidrv, tc);
+			tc->con_dev = NULL;
+			break;
+		}
+	}
+	mutex_unlock(&ect_mutex);
+}
+
+/*
+ * Operations to add and remove associated CTI.
+ * Register to coresight core driver as call back function.
+ */
+static struct cti_assoc_op cti_assoc_ops = {
+	.add = cti_add_assoc_to_csdev,
+	.remove = cti_remove_assoc_from_csdev
+};
+
+/*
+ * Update the cross references where the associated device was found
+ * while we were building the connection info. This will occur if the
+ * assoc device was registered before the CTI.
+ */
+static void cti_update_conn_xrefs(struct cti_drvdata *drvdata)
+{
+	struct cti_trig_con *tc;
+	struct cti_device *ctidev = &drvdata->ctidev;
+
+	list_for_each_entry(tc, &ctidev->trig_cons, node) {
+		if (tc->con_dev) {
+			/* if we can set the sysfs link */
+			if (cti_add_sysfs_link(drvdata, tc))
+				/* set the CTI/csdev association */
+				coresight_add_helper(tc->con_dev,
+						     drvdata->csdev);
+			else
+				/* otherwise remove reference from CTI */
+				tc->con_dev = NULL;
+		}
+	}
+}
+
+static void cti_remove_conn_xrefs(struct cti_drvdata *drvdata)
+{
+	struct cti_trig_con *tc;
+	struct cti_device *ctidev = &drvdata->ctidev;
+
+	list_for_each_entry(tc, &ctidev->trig_cons, node) {
+		if (tc->con_dev) {
+			cti_remove_sysfs_link(drvdata, tc);
+			tc->con_dev = NULL;
+		}
+	}
+}
+
+/** cti PM callbacks **/
+static int cti_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd,
+			     void *v)
+{
+	struct cti_drvdata *drvdata;
+	struct coresight_device *csdev;
+	unsigned int cpu = smp_processor_id();
+	int notify_res = NOTIFY_OK;
+
+	if (!cti_cpu_drvdata[cpu])
+		return NOTIFY_OK;
+
+	drvdata = cti_cpu_drvdata[cpu];
+	csdev = drvdata->csdev;
+
+	if (WARN_ON_ONCE(drvdata->ctidev.cpu != cpu))
+		return NOTIFY_BAD;
+
+	spin_lock(&drvdata->spinlock);
+
+	switch (cmd) {
+	case CPU_PM_ENTER:
+		/* CTI regs all static - we have a copy & nothing to save */
+		drvdata->config.hw_powered = false;
+		if (drvdata->config.hw_enabled)
+			coresight_disclaim_device(csdev);
+		break;
+
+	case CPU_PM_ENTER_FAILED:
+		drvdata->config.hw_powered = true;
+		if (drvdata->config.hw_enabled) {
+			if (coresight_claim_device(csdev))
+				drvdata->config.hw_enabled = false;
+		}
+		break;
+
+	case CPU_PM_EXIT:
+		/* write hardware registers to re-enable. */
+		drvdata->config.hw_powered = true;
+		drvdata->config.hw_enabled = false;
+
+		/* check enable reference count to enable HW */
+		if (drvdata->config.enable_req_count) {
+			/* check we can claim the device as we re-power */
+			if (coresight_claim_device(csdev))
+				goto cti_notify_exit;
+
+			drvdata->config.hw_enabled = true;
+			cti_write_all_hw_regs(drvdata);
+		}
+		break;
+
+	default:
+		notify_res = NOTIFY_DONE;
+		break;
+	}
+
+cti_notify_exit:
+	spin_unlock(&drvdata->spinlock);
+	return notify_res;
+}
+
+static struct notifier_block cti_cpu_pm_nb = {
+	.notifier_call = cti_cpu_pm_notify,
+};
+
+/* CPU HP handlers */
+static int cti_starting_cpu(unsigned int cpu)
+{
+	struct cti_drvdata *drvdata = cti_cpu_drvdata[cpu];
+
+	if (!drvdata)
+		return 0;
+
+	cti_cpuhp_enable_hw(drvdata);
+	return 0;
+}
+
+static int cti_dying_cpu(unsigned int cpu)
+{
+	struct cti_drvdata *drvdata = cti_cpu_drvdata[cpu];
+
+	if (!drvdata)
+		return 0;
+
+	spin_lock(&drvdata->spinlock);
+	drvdata->config.hw_powered = false;
+	if (drvdata->config.hw_enabled)
+		coresight_disclaim_device(drvdata->csdev);
+	spin_unlock(&drvdata->spinlock);
+	return 0;
+}
+
+static int cti_pm_setup(struct cti_drvdata *drvdata)
+{
+	int ret;
+
+	if (drvdata->ctidev.cpu == -1)
+		return 0;
+
+	if (nr_cti_cpu)
+		goto done;
+
+	cpus_read_lock();
+	ret = cpuhp_setup_state_nocalls_cpuslocked(
+			CPUHP_AP_ARM_CORESIGHT_CTI_STARTING,
+			"arm/coresight_cti:starting",
+			cti_starting_cpu, cti_dying_cpu);
+	if (ret) {
+		cpus_read_unlock();
+		return ret;
+	}
+
+	ret = cpu_pm_register_notifier(&cti_cpu_pm_nb);
+	cpus_read_unlock();
+	if (ret) {
+		cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CORESIGHT_CTI_STARTING);
+		return ret;
+	}
+
+done:
+	nr_cti_cpu++;
+	cti_cpu_drvdata[drvdata->ctidev.cpu] = drvdata;
+
+	return 0;
+}
+
+/* release PM registrations */
+static void cti_pm_release(struct cti_drvdata *drvdata)
+{
+	if (drvdata->ctidev.cpu == -1)
+		return;
+
+	cti_cpu_drvdata[drvdata->ctidev.cpu] = NULL;
+	if (--nr_cti_cpu == 0) {
+		cpu_pm_unregister_notifier(&cti_cpu_pm_nb);
+		cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CORESIGHT_CTI_STARTING);
+	}
+}
+
+/** cti ect operations **/
+int cti_enable(struct coresight_device *csdev, enum cs_mode mode, void *data)
+{
+	struct cti_drvdata *drvdata = csdev_to_cti_drvdata(csdev);
+
+	return cti_enable_hw(drvdata);
+}
+
+int cti_disable(struct coresight_device *csdev, void *data)
+{
+	struct cti_drvdata *drvdata = csdev_to_cti_drvdata(csdev);
+
+	return cti_disable_hw(drvdata);
+}
+
+static const struct coresight_ops_helper cti_ops_ect = {
+	.enable = cti_enable,
+	.disable = cti_disable,
+};
+
+static const struct coresight_ops cti_ops = {
+	.helper_ops = &cti_ops_ect,
+};
+
+/*
+ * Free up CTI specific resources
+ * called by dev->release, need to call down to underlying csdev release.
+ */
+static void cti_device_release(struct device *dev)
+{
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct cti_drvdata *ect_item, *ect_tmp;
+
+	mutex_lock(&ect_mutex);
+	cti_pm_release(drvdata);
+
+	/* remove from the list */
+	list_for_each_entry_safe(ect_item, ect_tmp, &ect_net, node) {
+		if (ect_item == drvdata) {
+			list_del(&ect_item->node);
+			break;
+		}
+	}
+	mutex_unlock(&ect_mutex);
+
+	if (drvdata->csdev_release)
+		drvdata->csdev_release(dev);
+}
+static void cti_remove(struct amba_device *adev)
+{
+	struct cti_drvdata *drvdata = dev_get_drvdata(&adev->dev);
+
+	mutex_lock(&ect_mutex);
+	cti_remove_conn_xrefs(drvdata);
+	mutex_unlock(&ect_mutex);
+
+	coresight_unregister(drvdata->csdev);
+}
+
+static int cti_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	int ret = 0;
+	void __iomem *base;
+	struct device *dev = &adev->dev;
+	struct cti_drvdata *drvdata = NULL;
+	struct coresight_desc cti_desc;
+	struct coresight_platform_data *pdata = NULL;
+	struct resource *res = &adev->res;
+
+	/* driver data*/
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	/* Validity for the resource is already checked by the AMBA core */
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	drvdata->base = base;
+	cti_desc.access = CSDEV_ACCESS_IOMEM(base);
+
+	dev_set_drvdata(dev, drvdata);
+
+	/* default CTI device info  */
+	drvdata->ctidev.cpu = -1;
+	drvdata->ctidev.nr_trig_con = 0;
+	drvdata->ctidev.ctm_id = 0;
+	INIT_LIST_HEAD(&drvdata->ctidev.trig_cons);
+
+	spin_lock_init(&drvdata->spinlock);
+
+	/* initialise CTI driver config values */
+	cti_set_default_config(dev, drvdata);
+
+	pdata = coresight_cti_get_platform_data(dev);
+	if (IS_ERR(pdata)) {
+		dev_err(dev, "coresight_cti_get_platform_data err\n");
+		return  PTR_ERR(pdata);
+	}
+
+	/* default to powered - could change on PM notifications */
+	drvdata->config.hw_powered = true;
+
+	/* set up device name - will depend if cpu bound or otherwise */
+	if (drvdata->ctidev.cpu >= 0)
+		cti_desc.name = devm_kasprintf(dev, GFP_KERNEL, "cti_cpu%d",
+					       drvdata->ctidev.cpu);
+	else
+		cti_desc.name = coresight_alloc_device_name(&cti_sys_devs, dev);
+	if (!cti_desc.name)
+		return -ENOMEM;
+
+	/* setup CPU power management handling for CPU bound CTI devices. */
+	ret = cti_pm_setup(drvdata);
+	if (ret)
+		return ret;
+
+	/* create dynamic attributes for connections */
+	ret = cti_create_cons_sysfs(dev, drvdata);
+	if (ret) {
+		dev_err(dev, "%s: create dynamic sysfs entries failed\n",
+			cti_desc.name);
+		goto pm_release;
+	}
+
+	/* set up coresight component description */
+	cti_desc.pdata = pdata;
+	cti_desc.type = CORESIGHT_DEV_TYPE_HELPER;
+	cti_desc.subtype.helper_subtype = CORESIGHT_DEV_SUBTYPE_HELPER_ECT_CTI;
+	cti_desc.ops = &cti_ops;
+	cti_desc.groups = drvdata->ctidev.con_groups;
+	cti_desc.dev = dev;
+	drvdata->csdev = coresight_register(&cti_desc);
+	if (IS_ERR(drvdata->csdev)) {
+		ret = PTR_ERR(drvdata->csdev);
+		goto pm_release;
+	}
+
+	/* add to list of CTI devices */
+	mutex_lock(&ect_mutex);
+	list_add(&drvdata->node, &ect_net);
+	/* set any cross references */
+	cti_update_conn_xrefs(drvdata);
+	mutex_unlock(&ect_mutex);
+
+	/* set up release chain */
+	drvdata->csdev_release = drvdata->csdev->dev.release;
+	drvdata->csdev->dev.release = cti_device_release;
+
+	/* all done - dec pm refcount */
+	pm_runtime_put(&adev->dev);
+	dev_info(&drvdata->csdev->dev, "CTI initialized\n");
+	return 0;
+
+pm_release:
+	cti_pm_release(drvdata);
+	return ret;
+}
+
+static struct amba_cs_uci_id uci_id_cti[] = {
+	{
+		/*  CTI UCI data */
+		.devarch	= 0x47701a14, /* CTI v2 */
+		.devarch_mask	= 0xfff0ffff,
+		.devtype	= 0x00000014, /* maj(0x4-debug) min(0x1-ECT) */
+	}
+};
+
+static const struct amba_id cti_ids[] = {
+	CS_AMBA_ID(0x000bb906), /* Coresight CTI (SoC 400), C-A72, C-A57 */
+	CS_AMBA_ID(0x000bb922), /* CTI - C-A8 */
+	CS_AMBA_ID(0x000bb9a8), /* CTI - C-A53 */
+	CS_AMBA_ID(0x000bb9aa), /* CTI - C-A73 */
+	CS_AMBA_UCI_ID(0x000bb9da, uci_id_cti), /* CTI - C-A35 */
+	CS_AMBA_UCI_ID(0x000bb9ed, uci_id_cti), /* Coresight CTI (SoC 600) */
+	{ 0, 0},
+};
+
+MODULE_DEVICE_TABLE(amba, cti_ids);
+
+static struct amba_driver cti_driver = {
+	.drv = {
+		.name	= "coresight-cti",
+		.owner = THIS_MODULE,
+		.suppress_bind_attrs = true,
+	},
+	.probe		= cti_probe,
+	.remove		= cti_remove,
+	.id_table	= cti_ids,
+};
+
+static int __init cti_init(void)
+{
+	int ret;
+
+	ret = amba_driver_register(&cti_driver);
+	if (ret)
+		pr_info("Error registering cti driver\n");
+	coresight_set_cti_ops(&cti_assoc_ops);
+	return ret;
+}
+
+static void __exit cti_exit(void)
+{
+	coresight_remove_cti_ops();
+	amba_driver_unregister(&cti_driver);
+}
+
+module_init(cti_init);
+module_exit(cti_exit);
+
+MODULE_AUTHOR("Mike Leach <mike.leach@linaro.org>");
+MODULE_DESCRIPTION("Arm CoreSight CTI Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/hwtracing/coresight/coresight-cti-platform.c b/drivers/hwtracing/coresight/coresight-cti-platform.c
new file mode 100644
index 0000000000..ccef04f27f
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-cti-platform.c
@@ -0,0 +1,490 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019, The Linaro Limited. All rights reserved.
+ */
+#include <linux/coresight.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/of.h>
+#include <linux/property.h>
+#include <linux/slab.h>
+
+#include <dt-bindings/arm/coresight-cti-dt.h>
+
+#include "coresight-cti.h"
+#include "coresight-priv.h"
+
+/* Number of CTI signals in the v8 architecturally defined connection */
+#define NR_V8PE_IN_SIGS		2
+#define NR_V8PE_OUT_SIGS	3
+#define NR_V8ETM_INOUT_SIGS	4
+
+/* CTI device tree trigger connection node keyword */
+#define CTI_DT_CONNS		"trig-conns"
+
+/* CTI device tree connection property keywords */
+#define CTI_DT_V8ARCH_COMPAT	"arm,coresight-cti-v8-arch"
+#define CTI_DT_CSDEV_ASSOC	"arm,cs-dev-assoc"
+#define CTI_DT_TRIGIN_SIGS	"arm,trig-in-sigs"
+#define CTI_DT_TRIGOUT_SIGS	"arm,trig-out-sigs"
+#define CTI_DT_TRIGIN_TYPES	"arm,trig-in-types"
+#define CTI_DT_TRIGOUT_TYPES	"arm,trig-out-types"
+#define CTI_DT_FILTER_OUT_SIGS	"arm,trig-filters"
+#define CTI_DT_CONN_NAME	"arm,trig-conn-name"
+#define CTI_DT_CTM_ID		"arm,cti-ctm-id"
+
+#ifdef CONFIG_OF
+/*
+ * CTI can be bound to a CPU, or a system device.
+ * CPU can be declared at the device top level or in a connections node
+ * so need to check relative to node not device.
+ */
+static int of_cti_get_cpu_at_node(const struct device_node *node)
+{
+	int cpu;
+	struct device_node *dn;
+
+	if (node == NULL)
+		return -1;
+
+	dn = of_parse_phandle(node, "cpu", 0);
+	/* CTI affinity defaults to no cpu */
+	if (!dn)
+		return -1;
+	cpu = of_cpu_node_to_id(dn);
+	of_node_put(dn);
+
+	/* No Affinity  if no cpu nodes are found */
+	return (cpu < 0) ? -1 : cpu;
+}
+
+#else
+static int of_cti_get_cpu_at_node(const struct device_node *node)
+{
+	return -1;
+}
+
+#endif
+
+/*
+ * CTI can be bound to a CPU, or a system device.
+ * CPU can be declared at the device top level or in a connections node
+ * so need to check relative to node not device.
+ */
+static int cti_plat_get_cpu_at_node(struct fwnode_handle *fwnode)
+{
+	if (is_of_node(fwnode))
+		return of_cti_get_cpu_at_node(to_of_node(fwnode));
+	return -1;
+}
+
+const char *cti_plat_get_node_name(struct fwnode_handle *fwnode)
+{
+	if (is_of_node(fwnode))
+		return of_node_full_name(to_of_node(fwnode));
+	return "unknown";
+}
+
+/*
+ * Extract a name from the fwnode.
+ * If the device associated with the node is a coresight_device, then return
+ * that name and the coresight_device pointer, otherwise return the node name.
+ */
+static const char *
+cti_plat_get_csdev_or_node_name(struct fwnode_handle *fwnode,
+				struct coresight_device **csdev)
+{
+	const char *name = NULL;
+	*csdev = coresight_find_csdev_by_fwnode(fwnode);
+	if (*csdev)
+		name = dev_name(&(*csdev)->dev);
+	else
+		name = cti_plat_get_node_name(fwnode);
+	return name;
+}
+
+static bool cti_plat_node_name_eq(struct fwnode_handle *fwnode,
+				  const char *name)
+{
+	if (is_of_node(fwnode))
+		return of_node_name_eq(to_of_node(fwnode), name);
+	return false;
+}
+
+static int cti_plat_create_v8_etm_connection(struct device *dev,
+					     struct cti_drvdata *drvdata)
+{
+	int ret = -ENOMEM, i;
+	struct fwnode_handle *root_fwnode, *cs_fwnode;
+	const char *assoc_name = NULL;
+	struct coresight_device *csdev;
+	struct cti_trig_con *tc = NULL;
+
+	root_fwnode = dev_fwnode(dev);
+	if (IS_ERR_OR_NULL(root_fwnode))
+		return -EINVAL;
+
+	/* Can optionally have an etm node - return if not  */
+	cs_fwnode = fwnode_find_reference(root_fwnode, CTI_DT_CSDEV_ASSOC, 0);
+	if (IS_ERR(cs_fwnode))
+		return 0;
+
+	/* allocate memory */
+	tc = cti_allocate_trig_con(dev, NR_V8ETM_INOUT_SIGS,
+				   NR_V8ETM_INOUT_SIGS);
+	if (!tc)
+		goto create_v8_etm_out;
+
+	/* build connection data */
+	tc->con_in->used_mask = 0xF0; /* sigs <4,5,6,7> */
+	tc->con_out->used_mask = 0xF0; /* sigs <4,5,6,7> */
+
+	/*
+	 * The EXTOUT type signals from the ETM are connected to a set of input
+	 * triggers on the CTI, the EXTIN being connected to output triggers.
+	 */
+	for (i = 0; i < NR_V8ETM_INOUT_SIGS; i++) {
+		tc->con_in->sig_types[i] = ETM_EXTOUT;
+		tc->con_out->sig_types[i] = ETM_EXTIN;
+	}
+
+	/*
+	 * We look to see if the ETM coresight device associated with this
+	 * handle has been registered with the system - i.e. probed before
+	 * this CTI. If so csdev will be non NULL and we can use the device
+	 * name and pass the csdev to the connection entry function where
+	 * the association will be recorded.
+	 * If not, then simply record the name in the connection data, the
+	 * probing of the ETM will call into the CTI driver API to update the
+	 * association then.
+	 */
+	assoc_name = cti_plat_get_csdev_or_node_name(cs_fwnode, &csdev);
+	ret = cti_add_connection_entry(dev, drvdata, tc, csdev, assoc_name);
+
+create_v8_etm_out:
+	fwnode_handle_put(cs_fwnode);
+	return ret;
+}
+
+/*
+ * Create an architecturally defined v8 connection
+ * must have a cpu, can have an ETM.
+ */
+static int cti_plat_create_v8_connections(struct device *dev,
+					  struct cti_drvdata *drvdata)
+{
+	struct cti_device *cti_dev = &drvdata->ctidev;
+	struct cti_trig_con *tc = NULL;
+	int cpuid = 0;
+	char cpu_name_str[16];
+	int ret = -ENOMEM;
+
+	/* Must have a cpu node */
+	cpuid = cti_plat_get_cpu_at_node(dev_fwnode(dev));
+	if (cpuid < 0) {
+		dev_warn(dev,
+			 "ARM v8 architectural CTI connection: missing cpu\n");
+		return -EINVAL;
+	}
+	cti_dev->cpu = cpuid;
+
+	/* Allocate the v8 cpu connection memory */
+	tc = cti_allocate_trig_con(dev, NR_V8PE_IN_SIGS, NR_V8PE_OUT_SIGS);
+	if (!tc)
+		goto of_create_v8_out;
+
+	/* Set the v8 PE CTI connection data */
+	tc->con_in->used_mask = 0x3; /* sigs <0 1> */
+	tc->con_in->sig_types[0] = PE_DBGTRIGGER;
+	tc->con_in->sig_types[1] = PE_PMUIRQ;
+	tc->con_out->used_mask = 0x7; /* sigs <0 1 2 > */
+	tc->con_out->sig_types[0] = PE_EDBGREQ;
+	tc->con_out->sig_types[1] = PE_DBGRESTART;
+	tc->con_out->sig_types[2] = PE_CTIIRQ;
+	scnprintf(cpu_name_str, sizeof(cpu_name_str), "cpu%d", cpuid);
+
+	ret = cti_add_connection_entry(dev, drvdata, tc, NULL, cpu_name_str);
+	if (ret)
+		goto of_create_v8_out;
+
+	/* Create the v8 ETM associated connection */
+	ret = cti_plat_create_v8_etm_connection(dev, drvdata);
+	if (ret)
+		goto of_create_v8_out;
+
+	/* filter pe_edbgreq - PE trigout sig <0> */
+	drvdata->config.trig_out_filter |= 0x1;
+
+of_create_v8_out:
+	return ret;
+}
+
+static int cti_plat_check_v8_arch_compatible(struct device *dev)
+{
+	struct fwnode_handle *fwnode = dev_fwnode(dev);
+
+	if (is_of_node(fwnode))
+		return of_device_is_compatible(to_of_node(fwnode),
+					       CTI_DT_V8ARCH_COMPAT);
+	return 0;
+}
+
+static int cti_plat_count_sig_elements(const struct fwnode_handle *fwnode,
+				       const char *name)
+{
+	int nr_elem = fwnode_property_count_u32(fwnode, name);
+
+	return (nr_elem < 0 ? 0 : nr_elem);
+}
+
+static int cti_plat_read_trig_group(struct cti_trig_grp *tgrp,
+				    const struct fwnode_handle *fwnode,
+				    const char *grp_name)
+{
+	int idx, err = 0;
+	u32 *values;
+
+	if (!tgrp->nr_sigs)
+		return 0;
+
+	values = kcalloc(tgrp->nr_sigs, sizeof(u32), GFP_KERNEL);
+	if (!values)
+		return -ENOMEM;
+
+	err = fwnode_property_read_u32_array(fwnode, grp_name,
+					     values, tgrp->nr_sigs);
+
+	if (!err) {
+		/* set the signal usage mask */
+		for (idx = 0; idx < tgrp->nr_sigs; idx++)
+			tgrp->used_mask |= BIT(values[idx]);
+	}
+
+	kfree(values);
+	return err;
+}
+
+static int cti_plat_read_trig_types(struct cti_trig_grp *tgrp,
+				    const struct fwnode_handle *fwnode,
+				    const char *type_name)
+{
+	int items, err = 0, nr_sigs;
+	u32 *values = NULL, i;
+
+	/* allocate an array according to number of signals in connection */
+	nr_sigs = tgrp->nr_sigs;
+	if (!nr_sigs)
+		return 0;
+
+	/* see if any types have been included in the device description */
+	items = cti_plat_count_sig_elements(fwnode, type_name);
+	if (items > nr_sigs)
+		return -EINVAL;
+
+	/* need an array to store the values iff there are any */
+	if (items) {
+		values = kcalloc(items, sizeof(u32), GFP_KERNEL);
+		if (!values)
+			return -ENOMEM;
+
+		err = fwnode_property_read_u32_array(fwnode, type_name,
+						     values, items);
+		if (err)
+			goto read_trig_types_out;
+	}
+
+	/*
+	 * Match type id to signal index, 1st type to 1st index etc.
+	 * If fewer types than signals default remainder to GEN_IO.
+	 */
+	for (i = 0; i < nr_sigs; i++) {
+		if (i < items) {
+			tgrp->sig_types[i] =
+				values[i] < CTI_TRIG_MAX ? values[i] : GEN_IO;
+		} else {
+			tgrp->sig_types[i] = GEN_IO;
+		}
+	}
+
+read_trig_types_out:
+	kfree(values);
+	return err;
+}
+
+static int cti_plat_process_filter_sigs(struct cti_drvdata *drvdata,
+					const struct fwnode_handle *fwnode)
+{
+	struct cti_trig_grp *tg = NULL;
+	int err = 0, nr_filter_sigs;
+
+	nr_filter_sigs = cti_plat_count_sig_elements(fwnode,
+						     CTI_DT_FILTER_OUT_SIGS);
+	if (nr_filter_sigs == 0)
+		return 0;
+
+	if (nr_filter_sigs > drvdata->config.nr_trig_max)
+		return -EINVAL;
+
+	tg = kzalloc(sizeof(*tg), GFP_KERNEL);
+	if (!tg)
+		return -ENOMEM;
+
+	err = cti_plat_read_trig_group(tg, fwnode, CTI_DT_FILTER_OUT_SIGS);
+	if (!err)
+		drvdata->config.trig_out_filter |= tg->used_mask;
+
+	kfree(tg);
+	return err;
+}
+
+static int cti_plat_create_connection(struct device *dev,
+				      struct cti_drvdata *drvdata,
+				      struct fwnode_handle *fwnode)
+{
+	struct cti_trig_con *tc = NULL;
+	int cpuid = -1, err = 0;
+	struct coresight_device *csdev = NULL;
+	const char *assoc_name = "unknown";
+	char cpu_name_str[16];
+	int nr_sigs_in, nr_sigs_out;
+
+	/* look to see how many in and out signals we have */
+	nr_sigs_in = cti_plat_count_sig_elements(fwnode, CTI_DT_TRIGIN_SIGS);
+	nr_sigs_out = cti_plat_count_sig_elements(fwnode, CTI_DT_TRIGOUT_SIGS);
+
+	if ((nr_sigs_in > drvdata->config.nr_trig_max) ||
+	    (nr_sigs_out > drvdata->config.nr_trig_max))
+		return -EINVAL;
+
+	tc = cti_allocate_trig_con(dev, nr_sigs_in, nr_sigs_out);
+	if (!tc)
+		return -ENOMEM;
+
+	/* look for the signals properties. */
+	err = cti_plat_read_trig_group(tc->con_in, fwnode,
+				       CTI_DT_TRIGIN_SIGS);
+	if (err)
+		goto create_con_err;
+
+	err = cti_plat_read_trig_types(tc->con_in, fwnode,
+				       CTI_DT_TRIGIN_TYPES);
+	if (err)
+		goto create_con_err;
+
+	err = cti_plat_read_trig_group(tc->con_out, fwnode,
+				       CTI_DT_TRIGOUT_SIGS);
+	if (err)
+		goto create_con_err;
+
+	err = cti_plat_read_trig_types(tc->con_out, fwnode,
+				       CTI_DT_TRIGOUT_TYPES);
+	if (err)
+		goto create_con_err;
+
+	err = cti_plat_process_filter_sigs(drvdata, fwnode);
+	if (err)
+		goto create_con_err;
+
+	/* read the connection name if set - may be overridden by later */
+	fwnode_property_read_string(fwnode, CTI_DT_CONN_NAME, &assoc_name);
+
+	/* associated cpu ? */
+	cpuid = cti_plat_get_cpu_at_node(fwnode);
+	if (cpuid >= 0) {
+		drvdata->ctidev.cpu = cpuid;
+		scnprintf(cpu_name_str, sizeof(cpu_name_str), "cpu%d", cpuid);
+		assoc_name = cpu_name_str;
+	} else {
+		/* associated device ? */
+		struct fwnode_handle *cs_fwnode = fwnode_find_reference(fwnode,
+									CTI_DT_CSDEV_ASSOC,
+									0);
+		if (!IS_ERR(cs_fwnode)) {
+			assoc_name = cti_plat_get_csdev_or_node_name(cs_fwnode,
+								     &csdev);
+			fwnode_handle_put(cs_fwnode);
+		}
+	}
+	/* set up a connection */
+	err = cti_add_connection_entry(dev, drvdata, tc, csdev, assoc_name);
+
+create_con_err:
+	return err;
+}
+
+static int cti_plat_create_impdef_connections(struct device *dev,
+					      struct cti_drvdata *drvdata)
+{
+	int rc = 0;
+	struct fwnode_handle *fwnode = dev_fwnode(dev);
+	struct fwnode_handle *child = NULL;
+
+	if (IS_ERR_OR_NULL(fwnode))
+		return -EINVAL;
+
+	fwnode_for_each_child_node(fwnode, child) {
+		if (cti_plat_node_name_eq(child, CTI_DT_CONNS))
+			rc = cti_plat_create_connection(dev, drvdata,
+							child);
+		if (rc != 0)
+			break;
+	}
+	fwnode_handle_put(child);
+
+	return rc;
+}
+
+/* get the hardware configuration & connection data. */
+static int cti_plat_get_hw_data(struct device *dev, struct cti_drvdata *drvdata)
+{
+	int rc = 0;
+	struct cti_device *cti_dev = &drvdata->ctidev;
+
+	/* get any CTM ID - defaults to 0 */
+	device_property_read_u32(dev, CTI_DT_CTM_ID, &cti_dev->ctm_id);
+
+	/* check for a v8 architectural CTI device */
+	if (cti_plat_check_v8_arch_compatible(dev))
+		rc = cti_plat_create_v8_connections(dev, drvdata);
+	else
+		rc = cti_plat_create_impdef_connections(dev, drvdata);
+	if (rc)
+		return rc;
+
+	/* if no connections, just add a single default based on max IN-OUT */
+	if (cti_dev->nr_trig_con == 0)
+		rc = cti_add_default_connection(dev, drvdata);
+	return rc;
+}
+
+struct coresight_platform_data *
+coresight_cti_get_platform_data(struct device *dev)
+{
+	int ret = -ENOENT;
+	struct coresight_platform_data *pdata = NULL;
+	struct fwnode_handle *fwnode = dev_fwnode(dev);
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (IS_ERR_OR_NULL(fwnode))
+		goto error;
+
+	/*
+	 * Alloc platform data but leave it zero init. CTI does not use the
+	 * same connection infrastructuree as trace path components but an
+	 * empty struct enables us to use the standard coresight component
+	 * registration code.
+	 */
+	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	/* get some CTI specifics */
+	ret = cti_plat_get_hw_data(dev, drvdata);
+
+	if (!ret)
+		return pdata;
+error:
+	return ERR_PTR(ret);
+}
diff --git a/drivers/hwtracing/coresight/coresight-cti-sysfs.c b/drivers/hwtracing/coresight/coresight-cti-sysfs.c
new file mode 100644
index 0000000000..d25dd2737b
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-cti-sysfs.c
@@ -0,0 +1,1187 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 Linaro Limited, All rights reserved.
+ * Author: Mike Leach <mike.leach@linaro.org>
+ */
+
+#include <linux/atomic.h>
+#include <linux/coresight.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/sysfs.h>
+
+#include "coresight-cti.h"
+
+/*
+ * Declare the number of static declared attribute groups
+ * Value includes groups + NULL value at end of table.
+ */
+#define CORESIGHT_CTI_STATIC_GROUPS_MAX 5
+
+/*
+ * List of trigger signal type names. Match the constants declared in
+ * include\dt-bindings\arm\coresight-cti-dt.h
+ */
+static const char * const sig_type_names[] = {
+	"genio",	/* GEN_IO */
+	"intreq",	/* GEN_INTREQ */
+	"intack",	/* GEN_INTACK */
+	"haltreq",	/* GEN_HALTREQ */
+	"restartreq",	/* GEN_RESTARTREQ */
+	"pe_edbgreq",	/* PE_EDBGREQ */
+	"pe_dbgrestart",/* PE_DBGRESTART */
+	"pe_ctiirq",	/* PE_CTIIRQ */
+	"pe_pmuirq",	/* PE_PMUIRQ */
+	"pe_dbgtrigger",/* PE_DBGTRIGGER */
+	"etm_extout",	/* ETM_EXTOUT */
+	"etm_extin",	/* ETM_EXTIN */
+	"snk_full",	/* SNK_FULL */
+	"snk_acqcomp",	/* SNK_ACQCOMP */
+	"snk_flushcomp",/* SNK_FLUSHCOMP */
+	"snk_flushin",	/* SNK_FLUSHIN */
+	"snk_trigin",	/* SNK_TRIGIN */
+	"stm_asyncout",	/* STM_ASYNCOUT */
+	"stm_tout_spte",/* STM_TOUT_SPTE */
+	"stm_tout_sw",	/* STM_TOUT_SW */
+	"stm_tout_hete",/* STM_TOUT_HETE */
+	"stm_hwevent",	/* STM_HWEVENT */
+	"ela_tstart",	/* ELA_TSTART */
+	"ela_tstop",	/* ELA_TSTOP */
+	"ela_dbgreq",	/* ELA_DBGREQ */
+};
+
+/* Show function pointer used in the connections dynamic declared attributes*/
+typedef ssize_t (*p_show_fn)(struct device *dev, struct device_attribute *attr,
+			     char *buf);
+
+/* Connection attribute types */
+enum cti_conn_attr_type {
+	CTI_CON_ATTR_NAME,
+	CTI_CON_ATTR_TRIGIN_SIG,
+	CTI_CON_ATTR_TRIGOUT_SIG,
+	CTI_CON_ATTR_TRIGIN_TYPES,
+	CTI_CON_ATTR_TRIGOUT_TYPES,
+	CTI_CON_ATTR_MAX,
+};
+
+/* Names for the connection attributes */
+static const char * const con_attr_names[CTI_CON_ATTR_MAX] = {
+	"name",
+	"in_signals",
+	"out_signals",
+	"in_types",
+	"out_types",
+};
+
+/* basic attributes */
+static ssize_t enable_show(struct device *dev,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	int enable_req;
+	bool enabled, powered;
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	spin_lock(&drvdata->spinlock);
+	enable_req = drvdata->config.enable_req_count;
+	powered = drvdata->config.hw_powered;
+	enabled = drvdata->config.hw_enabled;
+	spin_unlock(&drvdata->spinlock);
+
+	if (powered)
+		return sprintf(buf, "%d\n", enabled);
+	else
+		return sprintf(buf, "%d\n", !!enable_req);
+}
+
+static ssize_t enable_store(struct device *dev,
+			    struct device_attribute *attr,
+			    const char *buf, size_t size)
+{
+	int ret = 0;
+	unsigned long val;
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	if (val) {
+		ret = pm_runtime_resume_and_get(dev->parent);
+		if (ret)
+			return ret;
+		ret = cti_enable(drvdata->csdev, CS_MODE_SYSFS, NULL);
+		if (ret)
+			pm_runtime_put(dev->parent);
+	} else {
+		ret = cti_disable(drvdata->csdev, NULL);
+		if (!ret)
+			pm_runtime_put(dev->parent);
+	}
+
+	if (ret)
+		return ret;
+	return size;
+}
+static DEVICE_ATTR_RW(enable);
+
+static ssize_t powered_show(struct device *dev,
+			    struct device_attribute *attr,
+			    char *buf)
+{
+	bool powered;
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	spin_lock(&drvdata->spinlock);
+	powered = drvdata->config.hw_powered;
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%d\n", powered);
+}
+static DEVICE_ATTR_RO(powered);
+
+static ssize_t ctmid_show(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	return sprintf(buf, "%d\n", drvdata->ctidev.ctm_id);
+}
+static DEVICE_ATTR_RO(ctmid);
+
+static ssize_t nr_trigger_cons_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	return sprintf(buf, "%d\n", drvdata->ctidev.nr_trig_con);
+}
+static DEVICE_ATTR_RO(nr_trigger_cons);
+
+/* attribute and group sysfs tables. */
+static struct attribute *coresight_cti_attrs[] = {
+	&dev_attr_enable.attr,
+	&dev_attr_powered.attr,
+	&dev_attr_ctmid.attr,
+	&dev_attr_nr_trigger_cons.attr,
+	NULL,
+};
+
+/* register based attributes */
+
+/* Read registers with power check only (no enable check). */
+static ssize_t coresight_cti_reg_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
+{
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct cs_off_attribute *cti_attr = container_of(attr, struct cs_off_attribute, attr);
+	u32 val = 0;
+
+	pm_runtime_get_sync(dev->parent);
+	spin_lock(&drvdata->spinlock);
+	if (drvdata->config.hw_powered)
+		val = readl_relaxed(drvdata->base + cti_attr->off);
+	spin_unlock(&drvdata->spinlock);
+	pm_runtime_put_sync(dev->parent);
+	return sysfs_emit(buf, "0x%x\n", val);
+}
+
+/* Write registers with power check only (no enable check). */
+static __maybe_unused ssize_t coresight_cti_reg_store(struct device *dev,
+						      struct device_attribute *attr,
+						      const char *buf, size_t size)
+{
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct cs_off_attribute *cti_attr = container_of(attr, struct cs_off_attribute, attr);
+	unsigned long val = 0;
+
+	if (kstrtoul(buf, 0, &val))
+		return -EINVAL;
+
+	pm_runtime_get_sync(dev->parent);
+	spin_lock(&drvdata->spinlock);
+	if (drvdata->config.hw_powered)
+		cti_write_single_reg(drvdata, cti_attr->off, val);
+	spin_unlock(&drvdata->spinlock);
+	pm_runtime_put_sync(dev->parent);
+	return size;
+}
+
+#define coresight_cti_reg(name, offset)					\
+	(&((struct cs_off_attribute[]) {				\
+	   {								\
+		__ATTR(name, 0444, coresight_cti_reg_show, NULL),	\
+		offset							\
+	   }								\
+	})[0].attr.attr)
+
+#define coresight_cti_reg_rw(name, offset)				\
+	(&((struct cs_off_attribute[]) {				\
+	   {								\
+		__ATTR(name, 0644, coresight_cti_reg_show,		\
+		       coresight_cti_reg_store),			\
+		offset							\
+	   }								\
+	})[0].attr.attr)
+
+#define coresight_cti_reg_wo(name, offset)				\
+	(&((struct cs_off_attribute[]) {				\
+	   {								\
+		__ATTR(name, 0200, NULL, coresight_cti_reg_store),	\
+		offset							\
+	   }								\
+	})[0].attr.attr)
+
+/* coresight management registers */
+static struct attribute *coresight_cti_mgmt_attrs[] = {
+	coresight_cti_reg(devaff0, CTIDEVAFF0),
+	coresight_cti_reg(devaff1, CTIDEVAFF1),
+	coresight_cti_reg(authstatus, CORESIGHT_AUTHSTATUS),
+	coresight_cti_reg(devarch, CORESIGHT_DEVARCH),
+	coresight_cti_reg(devid, CORESIGHT_DEVID),
+	coresight_cti_reg(devtype, CORESIGHT_DEVTYPE),
+	coresight_cti_reg(pidr0, CORESIGHT_PERIPHIDR0),
+	coresight_cti_reg(pidr1, CORESIGHT_PERIPHIDR1),
+	coresight_cti_reg(pidr2, CORESIGHT_PERIPHIDR2),
+	coresight_cti_reg(pidr3, CORESIGHT_PERIPHIDR3),
+	coresight_cti_reg(pidr4, CORESIGHT_PERIPHIDR4),
+	NULL,
+};
+
+/* CTI low level programming registers */
+
+/*
+ * Show a simple 32 bit value if enabled and powered.
+ * If inaccessible & pcached_val not NULL then show cached value.
+ */
+static ssize_t cti_reg32_show(struct device *dev, char *buf,
+			      u32 *pcached_val, int reg_offset)
+{
+	u32 val = 0;
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct cti_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	if ((reg_offset >= 0) && cti_active(config)) {
+		CS_UNLOCK(drvdata->base);
+		val = readl_relaxed(drvdata->base + reg_offset);
+		if (pcached_val)
+			*pcached_val = val;
+		CS_LOCK(drvdata->base);
+	} else if (pcached_val) {
+		val = *pcached_val;
+	}
+	spin_unlock(&drvdata->spinlock);
+	return sprintf(buf, "%#x\n", val);
+}
+
+/*
+ * Store a simple 32 bit value.
+ * If pcached_val not NULL, then copy to here too,
+ * if reg_offset >= 0 then write through if enabled.
+ */
+static ssize_t cti_reg32_store(struct device *dev, const char *buf,
+			       size_t size, u32 *pcached_val, int reg_offset)
+{
+	unsigned long val;
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct cti_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 0, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	/* local store */
+	if (pcached_val)
+		*pcached_val = (u32)val;
+
+	/* write through if offset and enabled */
+	if ((reg_offset >= 0) && cti_active(config))
+		cti_write_single_reg(drvdata, reg_offset, val);
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+
+/* Standard macro for simple rw cti config registers */
+#define cti_config_reg32_rw(name, cfgname, offset)			\
+static ssize_t name##_show(struct device *dev,				\
+			   struct device_attribute *attr,		\
+			   char *buf)					\
+{									\
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);	\
+	return cti_reg32_show(dev, buf,					\
+			      &drvdata->config.cfgname, offset);	\
+}									\
+									\
+static ssize_t name##_store(struct device *dev,				\
+			    struct device_attribute *attr,		\
+			    const char *buf, size_t size)		\
+{									\
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);	\
+	return cti_reg32_store(dev, buf, size,				\
+			       &drvdata->config.cfgname, offset);	\
+}									\
+static DEVICE_ATTR_RW(name)
+
+static ssize_t inout_sel_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	u32 val;
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = (u32)drvdata->config.ctiinout_sel;
+	return sprintf(buf, "%d\n", val);
+}
+
+static ssize_t inout_sel_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t size)
+{
+	unsigned long val;
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	if (kstrtoul(buf, 0, &val))
+		return -EINVAL;
+	if (val > (CTIINOUTEN_MAX - 1))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	drvdata->config.ctiinout_sel = val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(inout_sel);
+
+static ssize_t inen_show(struct device *dev,
+			 struct device_attribute *attr,
+			 char *buf)
+{
+	unsigned long val;
+	int index;
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	spin_lock(&drvdata->spinlock);
+	index = drvdata->config.ctiinout_sel;
+	val = drvdata->config.ctiinen[index];
+	spin_unlock(&drvdata->spinlock);
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t inen_store(struct device *dev,
+			  struct device_attribute *attr,
+			  const char *buf, size_t size)
+{
+	unsigned long val;
+	int index;
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct cti_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 0, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	index = config->ctiinout_sel;
+	config->ctiinen[index] = val;
+
+	/* write through if enabled */
+	if (cti_active(config))
+		cti_write_single_reg(drvdata, CTIINEN(index), val);
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(inen);
+
+static ssize_t outen_show(struct device *dev,
+			  struct device_attribute *attr,
+			  char *buf)
+{
+	unsigned long val;
+	int index;
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	spin_lock(&drvdata->spinlock);
+	index = drvdata->config.ctiinout_sel;
+	val = drvdata->config.ctiouten[index];
+	spin_unlock(&drvdata->spinlock);
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t outen_store(struct device *dev,
+			   struct device_attribute *attr,
+			   const char *buf, size_t size)
+{
+	unsigned long val;
+	int index;
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct cti_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 0, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	index = config->ctiinout_sel;
+	config->ctiouten[index] = val;
+
+	/* write through if enabled */
+	if (cti_active(config))
+		cti_write_single_reg(drvdata, CTIOUTEN(index), val);
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(outen);
+
+static ssize_t intack_store(struct device *dev,
+			    struct device_attribute *attr,
+			    const char *buf, size_t size)
+{
+	unsigned long val;
+
+	if (kstrtoul(buf, 0, &val))
+		return -EINVAL;
+
+	cti_write_intack(dev, val);
+	return size;
+}
+static DEVICE_ATTR_WO(intack);
+
+cti_config_reg32_rw(gate, ctigate, CTIGATE);
+cti_config_reg32_rw(asicctl, asicctl, ASICCTL);
+cti_config_reg32_rw(appset, ctiappset, CTIAPPSET);
+
+static ssize_t appclear_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	unsigned long val;
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct cti_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 0, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+
+	/* a 1'b1 in appclr clears down the same bit in appset*/
+	config->ctiappset &= ~val;
+
+	/* write through if enabled */
+	if (cti_active(config))
+		cti_write_single_reg(drvdata, CTIAPPCLEAR, val);
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_WO(appclear);
+
+static ssize_t apppulse_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	unsigned long val;
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct cti_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 0, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+
+	/* write through if enabled */
+	if (cti_active(config))
+		cti_write_single_reg(drvdata, CTIAPPPULSE, val);
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_WO(apppulse);
+
+/*
+ * Define CONFIG_CORESIGHT_CTI_INTEGRATION_REGS to enable the access to the
+ * integration control registers. Normally only used to investigate connection
+ * data.
+ */
+static struct attribute *coresight_cti_regs_attrs[] = {
+	&dev_attr_inout_sel.attr,
+	&dev_attr_inen.attr,
+	&dev_attr_outen.attr,
+	&dev_attr_gate.attr,
+	&dev_attr_asicctl.attr,
+	&dev_attr_intack.attr,
+	&dev_attr_appset.attr,
+	&dev_attr_appclear.attr,
+	&dev_attr_apppulse.attr,
+	coresight_cti_reg(triginstatus, CTITRIGINSTATUS),
+	coresight_cti_reg(trigoutstatus, CTITRIGOUTSTATUS),
+	coresight_cti_reg(chinstatus, CTICHINSTATUS),
+	coresight_cti_reg(choutstatus, CTICHOUTSTATUS),
+#ifdef CONFIG_CORESIGHT_CTI_INTEGRATION_REGS
+	coresight_cti_reg_rw(itctrl, CORESIGHT_ITCTRL),
+	coresight_cti_reg(ittrigin, ITTRIGIN),
+	coresight_cti_reg(itchin, ITCHIN),
+	coresight_cti_reg_rw(ittrigout, ITTRIGOUT),
+	coresight_cti_reg_rw(itchout, ITCHOUT),
+	coresight_cti_reg(itchoutack, ITCHOUTACK),
+	coresight_cti_reg(ittrigoutack, ITTRIGOUTACK),
+	coresight_cti_reg_wo(ittriginack, ITTRIGINACK),
+	coresight_cti_reg_wo(itchinack, ITCHINACK),
+#endif
+	NULL,
+};
+
+/* CTI channel x-trigger programming */
+static int
+cti_trig_op_parse(struct device *dev, enum cti_chan_op op,
+		  enum cti_trig_dir dir, const char *buf, size_t size)
+{
+	u32 chan_idx;
+	u32 trig_idx;
+	int items, err = -EINVAL;
+
+	/* extract chan idx and trigger idx */
+	items = sscanf(buf, "%d %d", &chan_idx, &trig_idx);
+	if (items == 2) {
+		err = cti_channel_trig_op(dev, op, dir, chan_idx, trig_idx);
+		if (!err)
+			err = size;
+	}
+	return err;
+}
+
+static ssize_t trigin_attach_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t size)
+{
+	return cti_trig_op_parse(dev, CTI_CHAN_ATTACH, CTI_TRIG_IN,
+				 buf, size);
+}
+static DEVICE_ATTR_WO(trigin_attach);
+
+static ssize_t trigin_detach_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t size)
+{
+	return cti_trig_op_parse(dev, CTI_CHAN_DETACH, CTI_TRIG_IN,
+				 buf, size);
+}
+static DEVICE_ATTR_WO(trigin_detach);
+
+static ssize_t trigout_attach_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t size)
+{
+	return cti_trig_op_parse(dev, CTI_CHAN_ATTACH, CTI_TRIG_OUT,
+				 buf, size);
+}
+static DEVICE_ATTR_WO(trigout_attach);
+
+static ssize_t trigout_detach_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t size)
+{
+	return cti_trig_op_parse(dev, CTI_CHAN_DETACH, CTI_TRIG_OUT,
+				 buf, size);
+}
+static DEVICE_ATTR_WO(trigout_detach);
+
+
+static ssize_t chan_gate_enable_store(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t size)
+{
+	int err = 0, channel = 0;
+
+	if (kstrtoint(buf, 0, &channel))
+		return -EINVAL;
+
+	err = cti_channel_gate_op(dev, CTI_GATE_CHAN_ENABLE, channel);
+	return err ? err : size;
+}
+
+static ssize_t chan_gate_enable_show(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct cti_config *cfg = &drvdata->config;
+	unsigned long ctigate_bitmask = cfg->ctigate;
+	int size = 0;
+
+	if (cfg->ctigate == 0)
+		size = sprintf(buf, "\n");
+	else
+		size = bitmap_print_to_pagebuf(true, buf, &ctigate_bitmask,
+					       cfg->nr_ctm_channels);
+	return size;
+}
+static DEVICE_ATTR_RW(chan_gate_enable);
+
+static ssize_t chan_gate_disable_store(struct device *dev,
+				       struct device_attribute *attr,
+				       const char *buf, size_t size)
+{
+	int err = 0, channel = 0;
+
+	if (kstrtoint(buf, 0, &channel))
+		return -EINVAL;
+
+	err = cti_channel_gate_op(dev, CTI_GATE_CHAN_DISABLE, channel);
+	return err ? err : size;
+}
+static DEVICE_ATTR_WO(chan_gate_disable);
+
+static int
+chan_op_parse(struct device *dev, enum cti_chan_set_op op, const char *buf)
+{
+	int err = 0, channel = 0;
+
+	if (kstrtoint(buf, 0, &channel))
+		return -EINVAL;
+
+	err = cti_channel_setop(dev, op, channel);
+	return err;
+
+}
+
+static ssize_t chan_set_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	int err = chan_op_parse(dev, CTI_CHAN_SET, buf);
+
+	return err ? err : size;
+}
+static DEVICE_ATTR_WO(chan_set);
+
+static ssize_t chan_clear_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t size)
+{
+	int err = chan_op_parse(dev, CTI_CHAN_CLR, buf);
+
+	return err ? err : size;
+}
+static DEVICE_ATTR_WO(chan_clear);
+
+static ssize_t chan_pulse_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t size)
+{
+	int err = chan_op_parse(dev, CTI_CHAN_PULSE, buf);
+
+	return err ? err : size;
+}
+static DEVICE_ATTR_WO(chan_pulse);
+
+static ssize_t trig_filter_enable_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	u32 val;
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	spin_lock(&drvdata->spinlock);
+	val = drvdata->config.trig_filter_enable;
+	spin_unlock(&drvdata->spinlock);
+	return sprintf(buf, "%d\n", val);
+}
+
+static ssize_t trig_filter_enable_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t size)
+{
+	unsigned long val;
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	if (kstrtoul(buf, 0, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	drvdata->config.trig_filter_enable = !!val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(trig_filter_enable);
+
+static ssize_t trigout_filtered_show(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct cti_config *cfg = &drvdata->config;
+	int size = 0, nr_trig_max = cfg->nr_trig_max;
+	unsigned long mask = cfg->trig_out_filter;
+
+	if (mask)
+		size = bitmap_print_to_pagebuf(true, buf, &mask, nr_trig_max);
+	return size;
+}
+static DEVICE_ATTR_RO(trigout_filtered);
+
+/* clear all xtrigger / channel programming */
+static ssize_t chan_xtrigs_reset_store(struct device *dev,
+				       struct device_attribute *attr,
+				       const char *buf, size_t size)
+{
+	int i;
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct cti_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+
+	/* clear the CTI trigger / channel programming registers */
+	for (i = 0; i < config->nr_trig_max; i++) {
+		config->ctiinen[i] = 0;
+		config->ctiouten[i] = 0;
+	}
+
+	/* clear the other regs */
+	config->ctigate = GENMASK(config->nr_ctm_channels - 1, 0);
+	config->asicctl = 0;
+	config->ctiappset = 0;
+	config->ctiinout_sel = 0;
+	config->xtrig_rchan_sel = 0;
+
+	/* if enabled then write through */
+	if (cti_active(config))
+		cti_write_all_hw_regs(drvdata);
+
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_WO(chan_xtrigs_reset);
+
+/*
+ * Write to select a channel to view, read to display the
+ * cross triggers for the selected channel.
+ */
+static ssize_t chan_xtrigs_sel_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t size)
+{
+	unsigned long val;
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	if (kstrtoul(buf, 0, &val))
+		return -EINVAL;
+	if (val > (drvdata->config.nr_ctm_channels - 1))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	drvdata->config.xtrig_rchan_sel = val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+
+static ssize_t chan_xtrigs_sel_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	unsigned long val;
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	spin_lock(&drvdata->spinlock);
+	val = drvdata->config.xtrig_rchan_sel;
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%ld\n", val);
+}
+static DEVICE_ATTR_RW(chan_xtrigs_sel);
+
+static ssize_t chan_xtrigs_in_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf)
+{
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct cti_config *cfg = &drvdata->config;
+	int used = 0, reg_idx;
+	int nr_trig_max = drvdata->config.nr_trig_max;
+	u32 chan_mask = BIT(cfg->xtrig_rchan_sel);
+
+	for (reg_idx = 0; reg_idx < nr_trig_max; reg_idx++) {
+		if (chan_mask & cfg->ctiinen[reg_idx])
+			used += sprintf(buf + used, "%d ", reg_idx);
+	}
+
+	used += sprintf(buf + used, "\n");
+	return used;
+}
+static DEVICE_ATTR_RO(chan_xtrigs_in);
+
+static ssize_t chan_xtrigs_out_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct cti_config *cfg = &drvdata->config;
+	int used = 0, reg_idx;
+	int nr_trig_max = drvdata->config.nr_trig_max;
+	u32 chan_mask = BIT(cfg->xtrig_rchan_sel);
+
+	for (reg_idx = 0; reg_idx < nr_trig_max; reg_idx++) {
+		if (chan_mask & cfg->ctiouten[reg_idx])
+			used += sprintf(buf + used, "%d ", reg_idx);
+	}
+
+	used += sprintf(buf + used, "\n");
+	return used;
+}
+static DEVICE_ATTR_RO(chan_xtrigs_out);
+
+static ssize_t print_chan_list(struct device *dev,
+			       char *buf, bool inuse)
+{
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct cti_config *config = &drvdata->config;
+	int size, i;
+	unsigned long inuse_bits = 0, chan_mask;
+
+	/* scan regs to get bitmap of channels in use. */
+	spin_lock(&drvdata->spinlock);
+	for (i = 0; i < config->nr_trig_max; i++) {
+		inuse_bits |= config->ctiinen[i];
+		inuse_bits |= config->ctiouten[i];
+	}
+	spin_unlock(&drvdata->spinlock);
+
+	/* inverse bits if printing free channels */
+	if (!inuse)
+		inuse_bits = ~inuse_bits;
+
+	/* list of channels, or 'none' */
+	chan_mask = GENMASK(config->nr_ctm_channels - 1, 0);
+	if (inuse_bits & chan_mask)
+		size = bitmap_print_to_pagebuf(true, buf, &inuse_bits,
+					       config->nr_ctm_channels);
+	else
+		size = sprintf(buf, "\n");
+	return size;
+}
+
+static ssize_t chan_inuse_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buf)
+{
+	return print_chan_list(dev, buf, true);
+}
+static DEVICE_ATTR_RO(chan_inuse);
+
+static ssize_t chan_free_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	return print_chan_list(dev, buf, false);
+}
+static DEVICE_ATTR_RO(chan_free);
+
+static struct attribute *coresight_cti_channel_attrs[] = {
+	&dev_attr_trigin_attach.attr,
+	&dev_attr_trigin_detach.attr,
+	&dev_attr_trigout_attach.attr,
+	&dev_attr_trigout_detach.attr,
+	&dev_attr_trig_filter_enable.attr,
+	&dev_attr_trigout_filtered.attr,
+	&dev_attr_chan_gate_enable.attr,
+	&dev_attr_chan_gate_disable.attr,
+	&dev_attr_chan_set.attr,
+	&dev_attr_chan_clear.attr,
+	&dev_attr_chan_pulse.attr,
+	&dev_attr_chan_inuse.attr,
+	&dev_attr_chan_free.attr,
+	&dev_attr_chan_xtrigs_sel.attr,
+	&dev_attr_chan_xtrigs_in.attr,
+	&dev_attr_chan_xtrigs_out.attr,
+	&dev_attr_chan_xtrigs_reset.attr,
+	NULL,
+};
+
+/* Create the connections trigger groups and attrs dynamically */
+/*
+ * Each connection has dynamic group triggers<N> + name, trigin/out sigs/types
+ * attributes, + each device has static nr_trigger_cons giving the number
+ * of groups. e.g. in sysfs:-
+ * /cti_<name>/triggers0
+ * /cti_<name>/triggers1
+ * /cti_<name>/nr_trigger_cons
+ * where nr_trigger_cons = 2
+ */
+static ssize_t con_name_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	struct dev_ext_attribute *ext_attr =
+		container_of(attr, struct dev_ext_attribute, attr);
+	struct cti_trig_con *con = (struct cti_trig_con *)ext_attr->var;
+
+	return sprintf(buf, "%s\n", con->con_dev_name);
+}
+
+static ssize_t trigin_sig_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buf)
+{
+	struct dev_ext_attribute *ext_attr =
+		container_of(attr, struct dev_ext_attribute, attr);
+	struct cti_trig_con *con = (struct cti_trig_con *)ext_attr->var;
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct cti_config *cfg = &drvdata->config;
+	unsigned long mask = con->con_in->used_mask;
+
+	return bitmap_print_to_pagebuf(true, buf, &mask, cfg->nr_trig_max);
+}
+
+static ssize_t trigout_sig_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct dev_ext_attribute *ext_attr =
+		container_of(attr, struct dev_ext_attribute, attr);
+	struct cti_trig_con *con = (struct cti_trig_con *)ext_attr->var;
+	struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct cti_config *cfg = &drvdata->config;
+	unsigned long mask = con->con_out->used_mask;
+
+	return bitmap_print_to_pagebuf(true, buf, &mask, cfg->nr_trig_max);
+}
+
+/* convert a sig type id to a name */
+static const char *
+cti_sig_type_name(struct cti_trig_con *con, int used_count, bool in)
+{
+	int idx = 0;
+	struct cti_trig_grp *grp = in ? con->con_in : con->con_out;
+
+	if (used_count < grp->nr_sigs)
+		idx = grp->sig_types[used_count];
+	return sig_type_names[idx];
+}
+
+static ssize_t trigin_type_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct dev_ext_attribute *ext_attr =
+		container_of(attr, struct dev_ext_attribute, attr);
+	struct cti_trig_con *con = (struct cti_trig_con *)ext_attr->var;
+	int sig_idx, used = 0;
+	const char *name;
+
+	for (sig_idx = 0; sig_idx < con->con_in->nr_sigs; sig_idx++) {
+		name = cti_sig_type_name(con, sig_idx, true);
+		used += sprintf(buf + used, "%s ", name);
+	}
+	used += sprintf(buf + used, "\n");
+	return used;
+}
+
+static ssize_t trigout_type_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct dev_ext_attribute *ext_attr =
+		container_of(attr, struct dev_ext_attribute, attr);
+	struct cti_trig_con *con = (struct cti_trig_con *)ext_attr->var;
+	int sig_idx, used = 0;
+	const char *name;
+
+	for (sig_idx = 0; sig_idx < con->con_out->nr_sigs; sig_idx++) {
+		name = cti_sig_type_name(con, sig_idx, false);
+		used += sprintf(buf + used, "%s ", name);
+	}
+	used += sprintf(buf + used, "\n");
+	return used;
+}
+
+/*
+ * Array of show function names declared above to allow selection
+ * for the connection attributes
+ */
+static p_show_fn show_fns[CTI_CON_ATTR_MAX] = {
+	con_name_show,
+	trigin_sig_show,
+	trigout_sig_show,
+	trigin_type_show,
+	trigout_type_show,
+};
+
+static int cti_create_con_sysfs_attr(struct device *dev,
+				     struct cti_trig_con *con,
+				     enum cti_conn_attr_type attr_type,
+				     int attr_idx)
+{
+	struct dev_ext_attribute *eattr;
+	char *name;
+
+	eattr = devm_kzalloc(dev, sizeof(struct dev_ext_attribute),
+				    GFP_KERNEL);
+	if (eattr) {
+		name = devm_kstrdup(dev, con_attr_names[attr_type],
+				    GFP_KERNEL);
+		if (name) {
+			/* fill out the underlying attribute struct */
+			eattr->attr.attr.name = name;
+			eattr->attr.attr.mode = 0444;
+
+			/* now the device_attribute struct */
+			eattr->attr.show = show_fns[attr_type];
+		} else {
+			return -ENOMEM;
+		}
+	} else {
+		return -ENOMEM;
+	}
+	eattr->var = con;
+	con->con_attrs[attr_idx] = &eattr->attr.attr;
+	/*
+	 * Initialize the dynamically allocated attribute
+	 * to avoid LOCKDEP splat. See include/linux/sysfs.h
+	 * for more details.
+	 */
+	sysfs_attr_init(con->con_attrs[attr_idx]);
+
+	return 0;
+}
+
+static struct attribute_group *
+cti_create_con_sysfs_group(struct device *dev, struct cti_device *ctidev,
+			   int con_idx, struct cti_trig_con *tc)
+{
+	struct attribute_group *group = NULL;
+	int grp_idx;
+
+	group = devm_kzalloc(dev, sizeof(struct attribute_group), GFP_KERNEL);
+	if (!group)
+		return NULL;
+
+	group->name = devm_kasprintf(dev, GFP_KERNEL, "triggers%d", con_idx);
+	if (!group->name)
+		return NULL;
+
+	grp_idx = con_idx + CORESIGHT_CTI_STATIC_GROUPS_MAX - 1;
+	ctidev->con_groups[grp_idx] = group;
+	tc->attr_group = group;
+	return group;
+}
+
+/* create a triggers connection group and the attributes for that group */
+static int cti_create_con_attr_set(struct device *dev, int con_idx,
+				   struct cti_device *ctidev,
+				   struct cti_trig_con *tc)
+{
+	struct attribute_group *attr_group = NULL;
+	int attr_idx = 0;
+	int err = -ENOMEM;
+
+	attr_group = cti_create_con_sysfs_group(dev, ctidev, con_idx, tc);
+	if (!attr_group)
+		return -ENOMEM;
+
+	/* allocate NULL terminated array of attributes */
+	tc->con_attrs = devm_kcalloc(dev, CTI_CON_ATTR_MAX + 1,
+				     sizeof(struct attribute *), GFP_KERNEL);
+	if (!tc->con_attrs)
+		return -ENOMEM;
+
+	err = cti_create_con_sysfs_attr(dev, tc, CTI_CON_ATTR_NAME,
+					attr_idx++);
+	if (err)
+		return err;
+
+	if (tc->con_in->nr_sigs > 0) {
+		err = cti_create_con_sysfs_attr(dev, tc,
+						CTI_CON_ATTR_TRIGIN_SIG,
+						attr_idx++);
+		if (err)
+			return err;
+
+		err = cti_create_con_sysfs_attr(dev, tc,
+						CTI_CON_ATTR_TRIGIN_TYPES,
+						attr_idx++);
+		if (err)
+			return err;
+	}
+
+	if (tc->con_out->nr_sigs > 0) {
+		err = cti_create_con_sysfs_attr(dev, tc,
+						CTI_CON_ATTR_TRIGOUT_SIG,
+						attr_idx++);
+		if (err)
+			return err;
+
+		err = cti_create_con_sysfs_attr(dev, tc,
+						CTI_CON_ATTR_TRIGOUT_TYPES,
+						attr_idx++);
+		if (err)
+			return err;
+	}
+	attr_group->attrs = tc->con_attrs;
+	return 0;
+}
+
+/* create the array of group pointers for the CTI sysfs groups */
+static int cti_create_cons_groups(struct device *dev, struct cti_device *ctidev)
+{
+	int nr_groups;
+
+	/* nr groups = dynamic + static + NULL terminator */
+	nr_groups = ctidev->nr_trig_con + CORESIGHT_CTI_STATIC_GROUPS_MAX;
+	ctidev->con_groups = devm_kcalloc(dev, nr_groups,
+					  sizeof(struct attribute_group *),
+					  GFP_KERNEL);
+	if (!ctidev->con_groups)
+		return -ENOMEM;
+	return 0;
+}
+
+int cti_create_cons_sysfs(struct device *dev, struct cti_drvdata *drvdata)
+{
+	struct cti_device *ctidev = &drvdata->ctidev;
+	int err, con_idx = 0, i;
+	struct cti_trig_con *tc;
+
+	err = cti_create_cons_groups(dev, ctidev);
+	if (err)
+		return err;
+
+	/* populate first locations with the static set of groups */
+	for (i = 0; i < (CORESIGHT_CTI_STATIC_GROUPS_MAX - 1); i++)
+		ctidev->con_groups[i] = coresight_cti_groups[i];
+
+	/* add dynamic set for each connection */
+	list_for_each_entry(tc, &ctidev->trig_cons, node) {
+		err = cti_create_con_attr_set(dev, con_idx++, ctidev, tc);
+		if (err)
+			break;
+	}
+	return err;
+}
+
+/* attribute and group sysfs tables. */
+static const struct attribute_group coresight_cti_group = {
+	.attrs = coresight_cti_attrs,
+};
+
+static const struct attribute_group coresight_cti_mgmt_group = {
+	.attrs = coresight_cti_mgmt_attrs,
+	.name = "mgmt",
+};
+
+static const struct attribute_group coresight_cti_regs_group = {
+	.attrs = coresight_cti_regs_attrs,
+	.name = "regs",
+};
+
+static const struct attribute_group coresight_cti_channels_group = {
+	.attrs = coresight_cti_channel_attrs,
+	.name = "channels",
+};
+
+const struct attribute_group *
+coresight_cti_groups[CORESIGHT_CTI_STATIC_GROUPS_MAX] = {
+	&coresight_cti_group,
+	&coresight_cti_mgmt_group,
+	&coresight_cti_regs_group,
+	&coresight_cti_channels_group,
+	NULL,
+};
diff --git a/drivers/hwtracing/coresight/coresight-cti.h b/drivers/hwtracing/coresight/coresight-cti.h
new file mode 100644
index 0000000000..cb9ee616d0
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-cti.h
@@ -0,0 +1,241 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018 Linaro Limited, All rights reserved.
+ * Author: Mike Leach <mike.leach@linaro.org>
+ */
+
+#ifndef _CORESIGHT_CORESIGHT_CTI_H
+#define _CORESIGHT_CORESIGHT_CTI_H
+
+#include <linux/coresight.h>
+#include <linux/device.h>
+#include <linux/fwnode.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+
+#include "coresight-priv.h"
+
+/*
+ * Device registers
+ * 0x000 - 0x144: CTI programming and status
+ * 0xEDC - 0xEF8: CTI integration test.
+ * 0xF00 - 0xFFC: Coresight management registers.
+ */
+/* CTI programming registers */
+#define CTICONTROL		0x000
+#define CTIINTACK		0x010
+#define CTIAPPSET		0x014
+#define CTIAPPCLEAR		0x018
+#define CTIAPPPULSE		0x01C
+#define CTIINEN(n)		(0x020 + (4 * n))
+#define CTIOUTEN(n)		(0x0A0 + (4 * n))
+#define CTITRIGINSTATUS		0x130
+#define CTITRIGOUTSTATUS	0x134
+#define CTICHINSTATUS		0x138
+#define CTICHOUTSTATUS		0x13C
+#define CTIGATE			0x140
+#define ASICCTL			0x144
+/* Integration test registers */
+#define ITCHINACK		0xEDC /* WO CTI CSSoc 400 only*/
+#define ITTRIGINACK		0xEE0 /* WO CTI CSSoc 400 only*/
+#define ITCHOUT			0xEE4 /* WO RW-600 */
+#define ITTRIGOUT		0xEE8 /* WO RW-600 */
+#define ITCHOUTACK		0xEEC /* RO CTI CSSoc 400 only*/
+#define ITTRIGOUTACK		0xEF0 /* RO CTI CSSoc 400 only*/
+#define ITCHIN			0xEF4 /* RO */
+#define ITTRIGIN		0xEF8 /* RO */
+/* management registers */
+#define CTIDEVAFF0		0xFA8
+#define CTIDEVAFF1		0xFAC
+
+/*
+ * CTI CSSoc 600 has a max of 32 trigger signals per direction.
+ * CTI CSSoc 400 has 8 IO triggers - other CTIs can be impl def.
+ * Max of in and out defined in the DEVID register.
+ * - pick up actual number used from .dts parameters if present.
+ */
+#define CTIINOUTEN_MAX		32
+
+/**
+ * Group of related trigger signals
+ *
+ * @nr_sigs: number of signals in the group.
+ * @used_mask: bitmask representing the signal indexes in the group.
+ * @sig_types: array of types for the signals, length nr_sigs.
+ */
+struct cti_trig_grp {
+	int nr_sigs;
+	u32 used_mask;
+	int sig_types[];
+};
+
+/**
+ * Trigger connection - connection between a CTI and other (coresight) device
+ * lists input and output trigger signals for the device
+ *
+ * @con_in: connected CTIIN signals for the device.
+ * @con_out: connected CTIOUT signals for the device.
+ * @con_dev: coresight device connected to the CTI, NULL if not CS device
+ * @con_dev_name: name of connected device (CS or CPU)
+ * @node: entry node in list of connections.
+ * @con_attrs: Dynamic sysfs attributes specific to this connection.
+ * @attr_group: Dynamic attribute group created for this connection.
+ */
+struct cti_trig_con {
+	struct cti_trig_grp *con_in;
+	struct cti_trig_grp *con_out;
+	struct coresight_device *con_dev;
+	const char *con_dev_name;
+	struct list_head node;
+	struct attribute **con_attrs;
+	struct attribute_group *attr_group;
+};
+
+/**
+ * struct cti_device - description of CTI device properties.
+ *
+ * @nt_trig_con: Number of external devices connected to this device.
+ * @ctm_id: which CTM this device is connected to (by default it is
+ *          assumed there is a single CTM per SoC, ID 0).
+ * @trig_cons: list of connections to this device.
+ * @cpu: CPU ID if associated with CPU, -1 otherwise.
+ * @con_groups: combined static and dynamic sysfs groups for trigger
+ *		connections.
+ */
+struct cti_device {
+	int nr_trig_con;
+	u32 ctm_id;
+	struct list_head trig_cons;
+	int cpu;
+	const struct attribute_group **con_groups;
+};
+
+/**
+ * struct cti_config - configuration of the CTI device hardware
+ *
+ * @nr_trig_max: Max number of trigger signals implemented on device.
+ *		 (max of trig_in or trig_out) - from ID register.
+ * @nr_ctm_channels: number of available CTM channels - from ID register.
+ * @enable_req_count: CTI is enabled alongside >=1 associated devices.
+ * @hw_enabled: true if hw is currently enabled.
+ * @hw_powered: true if associated cpu powered on, or no cpu.
+ * @trig_in_use: bitfield of in triggers registered as in use.
+ * @trig_out_use: bitfield of out triggers registered as in use.
+ * @trig_out_filter: bitfield of out triggers that are blocked if filter
+ *		     enabled. Typically this would be dbgreq / restart on
+ *		     a core CTI.
+ * @trig_filter_enable: 1 if filtering enabled.
+ * @xtrig_rchan_sel: channel selection for xtrigger connection show.
+ * @ctiappset: CTI Software application channel set.
+ * @ctiinout_sel: register selector for INEN and OUTEN regs.
+ * @ctiinen: enable input trigger to a channel.
+ * @ctiouten: enable output trigger from a channel.
+ * @ctigate: gate channel output from CTI to CTM.
+ * @asicctl: asic control register.
+ */
+struct cti_config {
+	/* hardware description */
+	int nr_ctm_channels;
+	int nr_trig_max;
+
+	/* cti enable control */
+	int enable_req_count;
+	bool hw_enabled;
+	bool hw_powered;
+
+	/* registered triggers and filtering */
+	u32 trig_in_use;
+	u32 trig_out_use;
+	u32 trig_out_filter;
+	bool trig_filter_enable;
+	u8 xtrig_rchan_sel;
+
+	/* cti cross trig programmable regs */
+	u32 ctiappset;
+	u8 ctiinout_sel;
+	u32 ctiinen[CTIINOUTEN_MAX];
+	u32 ctiouten[CTIINOUTEN_MAX];
+	u32 ctigate;
+	u32 asicctl;
+};
+
+/**
+ * struct cti_drvdata - specifics for the CTI device
+ * @base:	Memory mapped base address for this component..
+ * @csdev:	Standard CoreSight device information.
+ * @ctidev:	Extra information needed by the CTI/CTM framework.
+ * @spinlock:	Control data access to one at a time.
+ * @config:	Configuration data for this CTI device.
+ * @node:	List entry of this device in the list of CTI devices.
+ * @csdev_release: release function for underlying coresight_device.
+ */
+struct cti_drvdata {
+	void __iomem *base;
+	struct coresight_device	*csdev;
+	struct cti_device ctidev;
+	spinlock_t spinlock;
+	struct cti_config config;
+	struct list_head node;
+	void (*csdev_release)(struct device *dev);
+};
+
+/*
+ * Channel operation types.
+ */
+enum cti_chan_op {
+	CTI_CHAN_ATTACH,
+	CTI_CHAN_DETACH,
+};
+
+enum cti_trig_dir {
+	CTI_TRIG_IN,
+	CTI_TRIG_OUT,
+};
+
+enum cti_chan_gate_op {
+	CTI_GATE_CHAN_ENABLE,
+	CTI_GATE_CHAN_DISABLE,
+};
+
+enum cti_chan_set_op {
+	CTI_CHAN_SET,
+	CTI_CHAN_CLR,
+	CTI_CHAN_PULSE,
+};
+
+/* private cti driver fns & vars */
+extern const struct attribute_group *coresight_cti_groups[];
+int cti_add_default_connection(struct device *dev,
+			       struct cti_drvdata *drvdata);
+int cti_add_connection_entry(struct device *dev, struct cti_drvdata *drvdata,
+			     struct cti_trig_con *tc,
+			     struct coresight_device *csdev,
+			     const char *assoc_dev_name);
+struct cti_trig_con *cti_allocate_trig_con(struct device *dev, int in_sigs,
+					   int out_sigs);
+int cti_enable(struct coresight_device *csdev, enum cs_mode mode, void *data);
+int cti_disable(struct coresight_device *csdev, void *data);
+void cti_write_all_hw_regs(struct cti_drvdata *drvdata);
+void cti_write_intack(struct device *dev, u32 ackval);
+void cti_write_single_reg(struct cti_drvdata *drvdata, int offset, u32 value);
+int cti_channel_trig_op(struct device *dev, enum cti_chan_op op,
+			enum cti_trig_dir direction, u32 channel_idx,
+			u32 trigger_idx);
+int cti_channel_gate_op(struct device *dev, enum cti_chan_gate_op op,
+			u32 channel_idx);
+int cti_channel_setop(struct device *dev, enum cti_chan_set_op op,
+		      u32 channel_idx);
+int cti_create_cons_sysfs(struct device *dev, struct cti_drvdata *drvdata);
+struct coresight_platform_data *
+coresight_cti_get_platform_data(struct device *dev);
+const char *cti_plat_get_node_name(struct fwnode_handle *fwnode);
+
+/* cti powered and enabled */
+static inline bool cti_active(struct cti_config *cfg)
+{
+	return cfg->hw_powered && cfg->hw_enabled;
+}
+
+#endif  /* _CORESIGHT_CORESIGHT_CTI_H */
diff --git a/drivers/hwtracing/coresight/coresight-dummy.c b/drivers/hwtracing/coresight/coresight-dummy.c
new file mode 100644
index 0000000000..e4deafae7b
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-dummy.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/coresight.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#include "coresight-priv.h"
+
+struct dummy_drvdata {
+	struct device			*dev;
+	struct coresight_device		*csdev;
+};
+
+DEFINE_CORESIGHT_DEVLIST(source_devs, "dummy_source");
+DEFINE_CORESIGHT_DEVLIST(sink_devs, "dummy_sink");
+
+static int dummy_source_enable(struct coresight_device *csdev,
+			       struct perf_event *event, enum cs_mode mode)
+{
+	dev_dbg(csdev->dev.parent, "Dummy source enabled\n");
+
+	return 0;
+}
+
+static void dummy_source_disable(struct coresight_device *csdev,
+				 struct perf_event *event)
+{
+	dev_dbg(csdev->dev.parent, "Dummy source disabled\n");
+}
+
+static int dummy_sink_enable(struct coresight_device *csdev, enum cs_mode mode,
+				void *data)
+{
+	dev_dbg(csdev->dev.parent, "Dummy sink enabled\n");
+
+	return 0;
+}
+
+static int dummy_sink_disable(struct coresight_device *csdev)
+{
+	dev_dbg(csdev->dev.parent, "Dummy sink disabled\n");
+
+	return 0;
+}
+
+static const struct coresight_ops_source dummy_source_ops = {
+	.enable	= dummy_source_enable,
+	.disable = dummy_source_disable,
+};
+
+static const struct coresight_ops dummy_source_cs_ops = {
+	.source_ops = &dummy_source_ops,
+};
+
+static const struct coresight_ops_sink dummy_sink_ops = {
+	.enable	= dummy_sink_enable,
+	.disable = dummy_sink_disable,
+};
+
+static const struct coresight_ops dummy_sink_cs_ops = {
+	.sink_ops = &dummy_sink_ops,
+};
+
+static int dummy_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *node = dev->of_node;
+	struct coresight_platform_data *pdata;
+	struct dummy_drvdata *drvdata;
+	struct coresight_desc desc = { 0 };
+
+	if (of_device_is_compatible(node, "arm,coresight-dummy-source")) {
+
+		desc.name = coresight_alloc_device_name(&source_devs, dev);
+		if (!desc.name)
+			return -ENOMEM;
+
+		desc.type = CORESIGHT_DEV_TYPE_SOURCE;
+		desc.subtype.source_subtype =
+					CORESIGHT_DEV_SUBTYPE_SOURCE_OTHERS;
+		desc.ops = &dummy_source_cs_ops;
+	} else if (of_device_is_compatible(node, "arm,coresight-dummy-sink")) {
+		desc.name = coresight_alloc_device_name(&sink_devs, dev);
+		if (!desc.name)
+			return -ENOMEM;
+
+		desc.type = CORESIGHT_DEV_TYPE_SINK;
+		desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_DUMMY;
+		desc.ops = &dummy_sink_cs_ops;
+	} else {
+		dev_err(dev, "Device type not set\n");
+		return -EINVAL;
+	}
+
+	pdata = coresight_get_platform_data(dev);
+	if (IS_ERR(pdata))
+		return PTR_ERR(pdata);
+	pdev->dev.platform_data = pdata;
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	drvdata->dev = &pdev->dev;
+	platform_set_drvdata(pdev, drvdata);
+
+	desc.pdata = pdev->dev.platform_data;
+	desc.dev = &pdev->dev;
+	drvdata->csdev = coresight_register(&desc);
+	if (IS_ERR(drvdata->csdev))
+		return PTR_ERR(drvdata->csdev);
+
+	pm_runtime_enable(dev);
+	dev_dbg(dev, "Dummy device initialized\n");
+
+	return 0;
+}
+
+static int dummy_remove(struct platform_device *pdev)
+{
+	struct dummy_drvdata *drvdata = platform_get_drvdata(pdev);
+	struct device *dev = &pdev->dev;
+
+	pm_runtime_disable(dev);
+	coresight_unregister(drvdata->csdev);
+	return 0;
+}
+
+static const struct of_device_id dummy_match[] = {
+	{.compatible = "arm,coresight-dummy-source"},
+	{.compatible = "arm,coresight-dummy-sink"},
+	{},
+};
+
+static struct platform_driver dummy_driver = {
+	.probe	= dummy_probe,
+	.remove	= dummy_remove,
+	.driver	= {
+		.name   = "coresight-dummy",
+		.of_match_table = dummy_match,
+	},
+};
+
+module_platform_driver(dummy_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("CoreSight dummy driver");
diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c
new file mode 100644
index 0000000000..fa80039e08
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etb10.c
@@ -0,0 +1,863 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ *
+ * Description: CoreSight Embedded Trace Buffer driver
+ */
+
+#include <linux/atomic.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/pm_runtime.h>
+#include <linux/seq_file.h>
+#include <linux/coresight.h>
+#include <linux/amba/bus.h>
+#include <linux/clk.h>
+#include <linux/circ_buf.h>
+#include <linux/mm.h>
+#include <linux/perf_event.h>
+
+
+#include "coresight-priv.h"
+#include "coresight-etm-perf.h"
+
+#define ETB_RAM_DEPTH_REG	0x004
+#define ETB_STATUS_REG		0x00c
+#define ETB_RAM_READ_DATA_REG	0x010
+#define ETB_RAM_READ_POINTER	0x014
+#define ETB_RAM_WRITE_POINTER	0x018
+#define ETB_TRG			0x01c
+#define ETB_CTL_REG		0x020
+#define ETB_RWD_REG		0x024
+#define ETB_FFSR		0x300
+#define ETB_FFCR		0x304
+#define ETB_ITMISCOP0		0xee0
+#define ETB_ITTRFLINACK		0xee4
+#define ETB_ITTRFLIN		0xee8
+#define ETB_ITATBDATA0		0xeeC
+#define ETB_ITATBCTR2		0xef0
+#define ETB_ITATBCTR1		0xef4
+#define ETB_ITATBCTR0		0xef8
+
+/* register description */
+/* STS - 0x00C */
+#define ETB_STATUS_RAM_FULL	BIT(0)
+/* CTL - 0x020 */
+#define ETB_CTL_CAPT_EN		BIT(0)
+/* FFCR - 0x304 */
+#define ETB_FFCR_EN_FTC		BIT(0)
+#define ETB_FFCR_FON_MAN	BIT(6)
+#define ETB_FFCR_STOP_FI	BIT(12)
+#define ETB_FFCR_STOP_TRIGGER	BIT(13)
+
+#define ETB_FFCR_BIT		6
+#define ETB_FFSR_BIT		1
+#define ETB_FRAME_SIZE_WORDS	4
+
+DEFINE_CORESIGHT_DEVLIST(etb_devs, "etb");
+
+/**
+ * struct etb_drvdata - specifics associated to an ETB component
+ * @base:	memory mapped base address for this component.
+ * @atclk:	optional clock for the core parts of the ETB.
+ * @csdev:	component vitals needed by the framework.
+ * @miscdev:	specifics to handle "/dev/xyz.etb" entry.
+ * @spinlock:	only one at a time pls.
+ * @reading:	synchronise user space access to etb buffer.
+ * @pid:	Process ID of the process being monitored by the session
+ *		that is using this component.
+ * @buf:	area of memory where ETB buffer content gets sent.
+ * @mode:	this ETB is being used.
+ * @buffer_depth: size of @buf.
+ * @trigger_cntr: amount of words to store after a trigger.
+ */
+struct etb_drvdata {
+	void __iomem		*base;
+	struct clk		*atclk;
+	struct coresight_device	*csdev;
+	struct miscdevice	miscdev;
+	spinlock_t		spinlock;
+	local_t			reading;
+	pid_t			pid;
+	u8			*buf;
+	u32			mode;
+	u32			buffer_depth;
+	u32			trigger_cntr;
+};
+
+static int etb_set_buffer(struct coresight_device *csdev,
+			  struct perf_output_handle *handle);
+
+static inline unsigned int etb_get_buffer_depth(struct etb_drvdata *drvdata)
+{
+	return readl_relaxed(drvdata->base + ETB_RAM_DEPTH_REG);
+}
+
+static void __etb_enable_hw(struct etb_drvdata *drvdata)
+{
+	int i;
+	u32 depth;
+
+	CS_UNLOCK(drvdata->base);
+
+	depth = drvdata->buffer_depth;
+	/* reset write RAM pointer address */
+	writel_relaxed(0x0, drvdata->base + ETB_RAM_WRITE_POINTER);
+	/* clear entire RAM buffer */
+	for (i = 0; i < depth; i++)
+		writel_relaxed(0x0, drvdata->base + ETB_RWD_REG);
+
+	/* reset write RAM pointer address */
+	writel_relaxed(0x0, drvdata->base + ETB_RAM_WRITE_POINTER);
+	/* reset read RAM pointer address */
+	writel_relaxed(0x0, drvdata->base + ETB_RAM_READ_POINTER);
+
+	writel_relaxed(drvdata->trigger_cntr, drvdata->base + ETB_TRG);
+	writel_relaxed(ETB_FFCR_EN_FTC | ETB_FFCR_STOP_TRIGGER,
+		       drvdata->base + ETB_FFCR);
+	/* ETB trace capture enable */
+	writel_relaxed(ETB_CTL_CAPT_EN, drvdata->base + ETB_CTL_REG);
+
+	CS_LOCK(drvdata->base);
+}
+
+static int etb_enable_hw(struct etb_drvdata *drvdata)
+{
+	int rc = coresight_claim_device(drvdata->csdev);
+
+	if (rc)
+		return rc;
+
+	__etb_enable_hw(drvdata);
+	return 0;
+}
+
+static int etb_enable_sysfs(struct coresight_device *csdev)
+{
+	int ret = 0;
+	unsigned long flags;
+	struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	/* Don't messup with perf sessions. */
+	if (drvdata->mode == CS_MODE_PERF) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	if (drvdata->mode == CS_MODE_DISABLED) {
+		ret = etb_enable_hw(drvdata);
+		if (ret)
+			goto out;
+
+		drvdata->mode = CS_MODE_SYSFS;
+	}
+
+	atomic_inc(&csdev->refcnt);
+out:
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+	return ret;
+}
+
+static int etb_enable_perf(struct coresight_device *csdev, void *data)
+{
+	int ret = 0;
+	pid_t pid;
+	unsigned long flags;
+	struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct perf_output_handle *handle = data;
+	struct cs_buffers *buf = etm_perf_sink_config(handle);
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	/* No need to continue if the component is already in used by sysFS. */
+	if (drvdata->mode == CS_MODE_SYSFS) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	/* Get a handle on the pid of the process to monitor */
+	pid = buf->pid;
+
+	if (drvdata->pid != -1 && drvdata->pid != pid) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	/*
+	 * No HW configuration is needed if the sink is already in
+	 * use for this session.
+	 */
+	if (drvdata->pid == pid) {
+		atomic_inc(&csdev->refcnt);
+		goto out;
+	}
+
+	/*
+	 * We don't have an internal state to clean up if we fail to setup
+	 * the perf buffer. So we can perform the step before we turn the
+	 * ETB on and leave without cleaning up.
+	 */
+	ret = etb_set_buffer(csdev, handle);
+	if (ret)
+		goto out;
+
+	ret = etb_enable_hw(drvdata);
+	if (!ret) {
+		/* Associate with monitored process. */
+		drvdata->pid = pid;
+		drvdata->mode = CS_MODE_PERF;
+		atomic_inc(&csdev->refcnt);
+	}
+
+out:
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+	return ret;
+}
+
+static int etb_enable(struct coresight_device *csdev, enum cs_mode mode,
+		      void *data)
+{
+	int ret;
+
+	switch (mode) {
+	case CS_MODE_SYSFS:
+		ret = etb_enable_sysfs(csdev);
+		break;
+	case CS_MODE_PERF:
+		ret = etb_enable_perf(csdev, data);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	if (ret)
+		return ret;
+
+	dev_dbg(&csdev->dev, "ETB enabled\n");
+	return 0;
+}
+
+static void __etb_disable_hw(struct etb_drvdata *drvdata)
+{
+	u32 ffcr;
+	struct device *dev = &drvdata->csdev->dev;
+	struct csdev_access *csa = &drvdata->csdev->access;
+
+	CS_UNLOCK(drvdata->base);
+
+	ffcr = readl_relaxed(drvdata->base + ETB_FFCR);
+	/* stop formatter when a stop has completed */
+	ffcr |= ETB_FFCR_STOP_FI;
+	writel_relaxed(ffcr, drvdata->base + ETB_FFCR);
+	/* manually generate a flush of the system */
+	ffcr |= ETB_FFCR_FON_MAN;
+	writel_relaxed(ffcr, drvdata->base + ETB_FFCR);
+
+	if (coresight_timeout(csa, ETB_FFCR, ETB_FFCR_BIT, 0)) {
+		dev_err(dev,
+		"timeout while waiting for completion of Manual Flush\n");
+	}
+
+	/* disable trace capture */
+	writel_relaxed(0x0, drvdata->base + ETB_CTL_REG);
+
+	if (coresight_timeout(csa, ETB_FFSR, ETB_FFSR_BIT, 1)) {
+		dev_err(dev,
+			"timeout while waiting for Formatter to Stop\n");
+	}
+
+	CS_LOCK(drvdata->base);
+}
+
+static void etb_dump_hw(struct etb_drvdata *drvdata)
+{
+	bool lost = false;
+	int i;
+	u8 *buf_ptr;
+	u32 read_data, depth;
+	u32 read_ptr, write_ptr;
+	u32 frame_off, frame_endoff;
+	struct device *dev = &drvdata->csdev->dev;
+
+	CS_UNLOCK(drvdata->base);
+
+	read_ptr = readl_relaxed(drvdata->base + ETB_RAM_READ_POINTER);
+	write_ptr = readl_relaxed(drvdata->base + ETB_RAM_WRITE_POINTER);
+
+	frame_off = write_ptr % ETB_FRAME_SIZE_WORDS;
+	frame_endoff = ETB_FRAME_SIZE_WORDS - frame_off;
+	if (frame_off) {
+		dev_err(dev,
+			"write_ptr: %lu not aligned to formatter frame size\n",
+			(unsigned long)write_ptr);
+		dev_err(dev, "frameoff: %lu, frame_endoff: %lu\n",
+			(unsigned long)frame_off, (unsigned long)frame_endoff);
+		write_ptr += frame_endoff;
+	}
+
+	if ((readl_relaxed(drvdata->base + ETB_STATUS_REG)
+		      & ETB_STATUS_RAM_FULL) == 0) {
+		writel_relaxed(0x0, drvdata->base + ETB_RAM_READ_POINTER);
+	} else {
+		writel_relaxed(write_ptr, drvdata->base + ETB_RAM_READ_POINTER);
+		lost = true;
+	}
+
+	depth = drvdata->buffer_depth;
+	buf_ptr = drvdata->buf;
+	for (i = 0; i < depth; i++) {
+		read_data = readl_relaxed(drvdata->base +
+					  ETB_RAM_READ_DATA_REG);
+		*(u32 *)buf_ptr = read_data;
+		buf_ptr += 4;
+	}
+
+	if (lost)
+		coresight_insert_barrier_packet(drvdata->buf);
+
+	if (frame_off) {
+		buf_ptr -= (frame_endoff * 4);
+		for (i = 0; i < frame_endoff; i++) {
+			*buf_ptr++ = 0x0;
+			*buf_ptr++ = 0x0;
+			*buf_ptr++ = 0x0;
+			*buf_ptr++ = 0x0;
+		}
+	}
+
+	writel_relaxed(read_ptr, drvdata->base + ETB_RAM_READ_POINTER);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void etb_disable_hw(struct etb_drvdata *drvdata)
+{
+	__etb_disable_hw(drvdata);
+	etb_dump_hw(drvdata);
+	coresight_disclaim_device(drvdata->csdev);
+}
+
+static int etb_disable(struct coresight_device *csdev)
+{
+	struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	unsigned long flags;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	if (atomic_dec_return(&csdev->refcnt)) {
+		spin_unlock_irqrestore(&drvdata->spinlock, flags);
+		return -EBUSY;
+	}
+
+	/* Complain if we (somehow) got out of sync */
+	WARN_ON_ONCE(drvdata->mode == CS_MODE_DISABLED);
+	etb_disable_hw(drvdata);
+	/* Dissociate from monitored process. */
+	drvdata->pid = -1;
+	drvdata->mode = CS_MODE_DISABLED;
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	dev_dbg(&csdev->dev, "ETB disabled\n");
+	return 0;
+}
+
+static void *etb_alloc_buffer(struct coresight_device *csdev,
+			      struct perf_event *event, void **pages,
+			      int nr_pages, bool overwrite)
+{
+	int node;
+	struct cs_buffers *buf;
+
+	node = (event->cpu == -1) ? NUMA_NO_NODE : cpu_to_node(event->cpu);
+
+	buf = kzalloc_node(sizeof(struct cs_buffers), GFP_KERNEL, node);
+	if (!buf)
+		return NULL;
+
+	buf->pid = task_pid_nr(event->owner);
+	buf->snapshot = overwrite;
+	buf->nr_pages = nr_pages;
+	buf->data_pages = pages;
+
+	return buf;
+}
+
+static void etb_free_buffer(void *config)
+{
+	struct cs_buffers *buf = config;
+
+	kfree(buf);
+}
+
+static int etb_set_buffer(struct coresight_device *csdev,
+			  struct perf_output_handle *handle)
+{
+	int ret = 0;
+	unsigned long head;
+	struct cs_buffers *buf = etm_perf_sink_config(handle);
+
+	if (!buf)
+		return -EINVAL;
+
+	/* wrap head around to the amount of space we have */
+	head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
+
+	/* find the page to write to */
+	buf->cur = head / PAGE_SIZE;
+
+	/* and offset within that page */
+	buf->offset = head % PAGE_SIZE;
+
+	local_set(&buf->data_size, 0);
+
+	return ret;
+}
+
+static unsigned long etb_update_buffer(struct coresight_device *csdev,
+			      struct perf_output_handle *handle,
+			      void *sink_config)
+{
+	bool lost = false;
+	int i, cur;
+	u8 *buf_ptr;
+	const u32 *barrier;
+	u32 read_ptr, write_ptr, capacity;
+	u32 status, read_data;
+	unsigned long offset, to_read = 0, flags;
+	struct cs_buffers *buf = sink_config;
+	struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	if (!buf)
+		return 0;
+
+	capacity = drvdata->buffer_depth * ETB_FRAME_SIZE_WORDS;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	/* Don't do anything if another tracer is using this sink */
+	if (atomic_read(&csdev->refcnt) != 1)
+		goto out;
+
+	__etb_disable_hw(drvdata);
+	CS_UNLOCK(drvdata->base);
+
+	/* unit is in words, not bytes */
+	read_ptr = readl_relaxed(drvdata->base + ETB_RAM_READ_POINTER);
+	write_ptr = readl_relaxed(drvdata->base + ETB_RAM_WRITE_POINTER);
+
+	/*
+	 * Entries should be aligned to the frame size.  If they are not
+	 * go back to the last alignment point to give decoding tools a
+	 * chance to fix things.
+	 */
+	if (write_ptr % ETB_FRAME_SIZE_WORDS) {
+		dev_err(&csdev->dev,
+			"write_ptr: %lu not aligned to formatter frame size\n",
+			(unsigned long)write_ptr);
+
+		write_ptr &= ~(ETB_FRAME_SIZE_WORDS - 1);
+		lost = true;
+	}
+
+	/*
+	 * Get a hold of the status register and see if a wrap around
+	 * has occurred.  If so adjust things accordingly.  Otherwise
+	 * start at the beginning and go until the write pointer has
+	 * been reached.
+	 */
+	status = readl_relaxed(drvdata->base + ETB_STATUS_REG);
+	if (status & ETB_STATUS_RAM_FULL) {
+		lost = true;
+		to_read = capacity;
+		read_ptr = write_ptr;
+	} else {
+		to_read = CIRC_CNT(write_ptr, read_ptr, drvdata->buffer_depth);
+		to_read *= ETB_FRAME_SIZE_WORDS;
+	}
+
+	/*
+	 * Make sure we don't overwrite data that hasn't been consumed yet.
+	 * It is entirely possible that the HW buffer has more data than the
+	 * ring buffer can currently handle.  If so adjust the start address
+	 * to take only the last traces.
+	 *
+	 * In snapshot mode we are looking to get the latest traces only and as
+	 * such, we don't care about not overwriting data that hasn't been
+	 * processed by user space.
+	 */
+	if (!buf->snapshot && to_read > handle->size) {
+		u32 mask = ~(ETB_FRAME_SIZE_WORDS - 1);
+
+		/* The new read pointer must be frame size aligned */
+		to_read = handle->size & mask;
+		/*
+		 * Move the RAM read pointer up, keeping in mind that
+		 * everything is in frame size units.
+		 */
+		read_ptr = (write_ptr + drvdata->buffer_depth) -
+					to_read / ETB_FRAME_SIZE_WORDS;
+		/* Wrap around if need be*/
+		if (read_ptr > (drvdata->buffer_depth - 1))
+			read_ptr -= drvdata->buffer_depth;
+		/* let the decoder know we've skipped ahead */
+		lost = true;
+	}
+
+	/*
+	 * Don't set the TRUNCATED flag in snapshot mode because 1) the
+	 * captured buffer is expected to be truncated and 2) a full buffer
+	 * prevents the event from being re-enabled by the perf core,
+	 * resulting in stale data being send to user space.
+	 */
+	if (!buf->snapshot && lost)
+		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
+
+	/* finally tell HW where we want to start reading from */
+	writel_relaxed(read_ptr, drvdata->base + ETB_RAM_READ_POINTER);
+
+	cur = buf->cur;
+	offset = buf->offset;
+	barrier = coresight_barrier_pkt;
+
+	for (i = 0; i < to_read; i += 4) {
+		buf_ptr = buf->data_pages[cur] + offset;
+		read_data = readl_relaxed(drvdata->base +
+					  ETB_RAM_READ_DATA_REG);
+		if (lost && i < CORESIGHT_BARRIER_PKT_SIZE) {
+			read_data = *barrier;
+			barrier++;
+		}
+
+		*(u32 *)buf_ptr = read_data;
+		buf_ptr += 4;
+
+		offset += 4;
+		if (offset >= PAGE_SIZE) {
+			offset = 0;
+			cur++;
+			/* wrap around at the end of the buffer */
+			cur &= buf->nr_pages - 1;
+		}
+	}
+
+	/* reset ETB buffer for next run */
+	writel_relaxed(0x0, drvdata->base + ETB_RAM_READ_POINTER);
+	writel_relaxed(0x0, drvdata->base + ETB_RAM_WRITE_POINTER);
+
+	/*
+	 * In snapshot mode we simply increment the head by the number of byte
+	 * that were written.  User space will figure out how many bytes to get
+	 * from the AUX buffer based on the position of the head.
+	 */
+	if (buf->snapshot)
+		handle->head += to_read;
+
+	__etb_enable_hw(drvdata);
+	CS_LOCK(drvdata->base);
+out:
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	return to_read;
+}
+
+static const struct coresight_ops_sink etb_sink_ops = {
+	.enable		= etb_enable,
+	.disable	= etb_disable,
+	.alloc_buffer	= etb_alloc_buffer,
+	.free_buffer	= etb_free_buffer,
+	.update_buffer	= etb_update_buffer,
+};
+
+static const struct coresight_ops etb_cs_ops = {
+	.sink_ops	= &etb_sink_ops,
+};
+
+static void etb_dump(struct etb_drvdata *drvdata)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	if (drvdata->mode == CS_MODE_SYSFS) {
+		__etb_disable_hw(drvdata);
+		etb_dump_hw(drvdata);
+		__etb_enable_hw(drvdata);
+	}
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	dev_dbg(&drvdata->csdev->dev, "ETB dumped\n");
+}
+
+static int etb_open(struct inode *inode, struct file *file)
+{
+	struct etb_drvdata *drvdata = container_of(file->private_data,
+						   struct etb_drvdata, miscdev);
+
+	if (local_cmpxchg(&drvdata->reading, 0, 1))
+		return -EBUSY;
+
+	dev_dbg(&drvdata->csdev->dev, "%s: successfully opened\n", __func__);
+	return 0;
+}
+
+static ssize_t etb_read(struct file *file, char __user *data,
+				size_t len, loff_t *ppos)
+{
+	u32 depth;
+	struct etb_drvdata *drvdata = container_of(file->private_data,
+						   struct etb_drvdata, miscdev);
+	struct device *dev = &drvdata->csdev->dev;
+
+	etb_dump(drvdata);
+
+	depth = drvdata->buffer_depth;
+	if (*ppos + len > depth * 4)
+		len = depth * 4 - *ppos;
+
+	if (copy_to_user(data, drvdata->buf + *ppos, len)) {
+		dev_dbg(dev,
+			"%s: copy_to_user failed\n", __func__);
+		return -EFAULT;
+	}
+
+	*ppos += len;
+
+	dev_dbg(dev, "%s: %zu bytes copied, %d bytes left\n",
+		__func__, len, (int)(depth * 4 - *ppos));
+	return len;
+}
+
+static int etb_release(struct inode *inode, struct file *file)
+{
+	struct etb_drvdata *drvdata = container_of(file->private_data,
+						   struct etb_drvdata, miscdev);
+	local_set(&drvdata->reading, 0);
+
+	dev_dbg(&drvdata->csdev->dev, "%s: released\n", __func__);
+	return 0;
+}
+
+static const struct file_operations etb_fops = {
+	.owner		= THIS_MODULE,
+	.open		= etb_open,
+	.read		= etb_read,
+	.release	= etb_release,
+	.llseek		= no_llseek,
+};
+
+static struct attribute *coresight_etb_mgmt_attrs[] = {
+	coresight_simple_reg32(rdp, ETB_RAM_DEPTH_REG),
+	coresight_simple_reg32(sts, ETB_STATUS_REG),
+	coresight_simple_reg32(rrp, ETB_RAM_READ_POINTER),
+	coresight_simple_reg32(rwp, ETB_RAM_WRITE_POINTER),
+	coresight_simple_reg32(trg, ETB_TRG),
+	coresight_simple_reg32(ctl, ETB_CTL_REG),
+	coresight_simple_reg32(ffsr, ETB_FFSR),
+	coresight_simple_reg32(ffcr, ETB_FFCR),
+	NULL,
+};
+
+static ssize_t trigger_cntr_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct etb_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	unsigned long val = drvdata->trigger_cntr;
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t trigger_cntr_store(struct device *dev,
+			     struct device_attribute *attr,
+			     const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etb_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	drvdata->trigger_cntr = val;
+	return size;
+}
+static DEVICE_ATTR_RW(trigger_cntr);
+
+static struct attribute *coresight_etb_attrs[] = {
+	&dev_attr_trigger_cntr.attr,
+	NULL,
+};
+
+static const struct attribute_group coresight_etb_group = {
+	.attrs = coresight_etb_attrs,
+};
+
+static const struct attribute_group coresight_etb_mgmt_group = {
+	.attrs = coresight_etb_mgmt_attrs,
+	.name = "mgmt",
+};
+
+static const struct attribute_group *coresight_etb_groups[] = {
+	&coresight_etb_group,
+	&coresight_etb_mgmt_group,
+	NULL,
+};
+
+static int etb_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	int ret;
+	void __iomem *base;
+	struct device *dev = &adev->dev;
+	struct coresight_platform_data *pdata = NULL;
+	struct etb_drvdata *drvdata;
+	struct resource *res = &adev->res;
+	struct coresight_desc desc = { 0 };
+
+	desc.name = coresight_alloc_device_name(&etb_devs, dev);
+	if (!desc.name)
+		return -ENOMEM;
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	drvdata->atclk = devm_clk_get(&adev->dev, "atclk"); /* optional */
+	if (!IS_ERR(drvdata->atclk)) {
+		ret = clk_prepare_enable(drvdata->atclk);
+		if (ret)
+			return ret;
+	}
+	dev_set_drvdata(dev, drvdata);
+
+	/* validity for the resource is already checked by the AMBA core */
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	drvdata->base = base;
+	desc.access = CSDEV_ACCESS_IOMEM(base);
+
+	spin_lock_init(&drvdata->spinlock);
+
+	drvdata->buffer_depth = etb_get_buffer_depth(drvdata);
+
+	if (drvdata->buffer_depth & 0x80000000)
+		return -EINVAL;
+
+	drvdata->buf = devm_kcalloc(dev,
+				    drvdata->buffer_depth, 4, GFP_KERNEL);
+	if (!drvdata->buf)
+		return -ENOMEM;
+
+	/* This device is not associated with a session */
+	drvdata->pid = -1;
+
+	pdata = coresight_get_platform_data(dev);
+	if (IS_ERR(pdata))
+		return PTR_ERR(pdata);
+	adev->dev.platform_data = pdata;
+
+	desc.type = CORESIGHT_DEV_TYPE_SINK;
+	desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
+	desc.ops = &etb_cs_ops;
+	desc.pdata = pdata;
+	desc.dev = dev;
+	desc.groups = coresight_etb_groups;
+	drvdata->csdev = coresight_register(&desc);
+	if (IS_ERR(drvdata->csdev))
+		return PTR_ERR(drvdata->csdev);
+
+	drvdata->miscdev.name = desc.name;
+	drvdata->miscdev.minor = MISC_DYNAMIC_MINOR;
+	drvdata->miscdev.fops = &etb_fops;
+	ret = misc_register(&drvdata->miscdev);
+	if (ret)
+		goto err_misc_register;
+
+	pm_runtime_put(&adev->dev);
+	return 0;
+
+err_misc_register:
+	coresight_unregister(drvdata->csdev);
+	return ret;
+}
+
+static void etb_remove(struct amba_device *adev)
+{
+	struct etb_drvdata *drvdata = dev_get_drvdata(&adev->dev);
+
+	/*
+	 * Since misc_open() holds a refcount on the f_ops, which is
+	 * etb fops in this case, device is there until last file
+	 * handler to this device is closed.
+	 */
+	misc_deregister(&drvdata->miscdev);
+	coresight_unregister(drvdata->csdev);
+}
+
+#ifdef CONFIG_PM
+static int etb_runtime_suspend(struct device *dev)
+{
+	struct etb_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata && !IS_ERR(drvdata->atclk))
+		clk_disable_unprepare(drvdata->atclk);
+
+	return 0;
+}
+
+static int etb_runtime_resume(struct device *dev)
+{
+	struct etb_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata && !IS_ERR(drvdata->atclk))
+		clk_prepare_enable(drvdata->atclk);
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops etb_dev_pm_ops = {
+	SET_RUNTIME_PM_OPS(etb_runtime_suspend, etb_runtime_resume, NULL)
+};
+
+static const struct amba_id etb_ids[] = {
+	{
+		.id	= 0x000bb907,
+		.mask	= 0x000fffff,
+	},
+	{ 0, 0},
+};
+
+MODULE_DEVICE_TABLE(amba, etb_ids);
+
+static struct amba_driver etb_driver = {
+	.drv = {
+		.name	= "coresight-etb10",
+		.owner	= THIS_MODULE,
+		.pm	= &etb_dev_pm_ops,
+		.suppress_bind_attrs = true,
+
+	},
+	.probe		= etb_probe,
+	.remove		= etb_remove,
+	.id_table	= etb_ids,
+};
+
+module_amba_driver(etb_driver);
+
+MODULE_AUTHOR("Pratik Patel <pratikp@codeaurora.org>");
+MODULE_AUTHOR("Mathieu Poirier <mathieu.poirier@linaro.org>");
+MODULE_DESCRIPTION("Arm CoreSight Embedded Trace Buffer driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/hwtracing/coresight/coresight-etm-cp14.c b/drivers/hwtracing/coresight/coresight-etm-cp14.c
new file mode 100644
index 0000000000..4174a8d355
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etm-cp14.c
@@ -0,0 +1,584 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bug.h>
+#include <asm/hardware/cp14.h>
+
+#include "coresight-etm.h"
+
+int etm_readl_cp14(u32 reg, unsigned int *val)
+{
+	switch (reg) {
+	case ETMCR:
+		*val = etm_read(ETMCR);
+		return 0;
+	case ETMCCR:
+		*val = etm_read(ETMCCR);
+		return 0;
+	case ETMTRIGGER:
+		*val = etm_read(ETMTRIGGER);
+		return 0;
+	case ETMSR:
+		*val = etm_read(ETMSR);
+		return 0;
+	case ETMSCR:
+		*val = etm_read(ETMSCR);
+		return 0;
+	case ETMTSSCR:
+		*val = etm_read(ETMTSSCR);
+		return 0;
+	case ETMTEEVR:
+		*val = etm_read(ETMTEEVR);
+		return 0;
+	case ETMTECR1:
+		*val = etm_read(ETMTECR1);
+		return 0;
+	case ETMFFLR:
+		*val = etm_read(ETMFFLR);
+		return 0;
+	case ETMACVRn(0):
+		*val = etm_read(ETMACVR0);
+		return 0;
+	case ETMACVRn(1):
+		*val = etm_read(ETMACVR1);
+		return 0;
+	case ETMACVRn(2):
+		*val = etm_read(ETMACVR2);
+		return 0;
+	case ETMACVRn(3):
+		*val = etm_read(ETMACVR3);
+		return 0;
+	case ETMACVRn(4):
+		*val = etm_read(ETMACVR4);
+		return 0;
+	case ETMACVRn(5):
+		*val = etm_read(ETMACVR5);
+		return 0;
+	case ETMACVRn(6):
+		*val = etm_read(ETMACVR6);
+		return 0;
+	case ETMACVRn(7):
+		*val = etm_read(ETMACVR7);
+		return 0;
+	case ETMACVRn(8):
+		*val = etm_read(ETMACVR8);
+		return 0;
+	case ETMACVRn(9):
+		*val = etm_read(ETMACVR9);
+		return 0;
+	case ETMACVRn(10):
+		*val = etm_read(ETMACVR10);
+		return 0;
+	case ETMACVRn(11):
+		*val = etm_read(ETMACVR11);
+		return 0;
+	case ETMACVRn(12):
+		*val = etm_read(ETMACVR12);
+		return 0;
+	case ETMACVRn(13):
+		*val = etm_read(ETMACVR13);
+		return 0;
+	case ETMACVRn(14):
+		*val = etm_read(ETMACVR14);
+		return 0;
+	case ETMACVRn(15):
+		*val = etm_read(ETMACVR15);
+		return 0;
+	case ETMACTRn(0):
+		*val = etm_read(ETMACTR0);
+		return 0;
+	case ETMACTRn(1):
+		*val = etm_read(ETMACTR1);
+		return 0;
+	case ETMACTRn(2):
+		*val = etm_read(ETMACTR2);
+		return 0;
+	case ETMACTRn(3):
+		*val = etm_read(ETMACTR3);
+		return 0;
+	case ETMACTRn(4):
+		*val = etm_read(ETMACTR4);
+		return 0;
+	case ETMACTRn(5):
+		*val = etm_read(ETMACTR5);
+		return 0;
+	case ETMACTRn(6):
+		*val = etm_read(ETMACTR6);
+		return 0;
+	case ETMACTRn(7):
+		*val = etm_read(ETMACTR7);
+		return 0;
+	case ETMACTRn(8):
+		*val = etm_read(ETMACTR8);
+		return 0;
+	case ETMACTRn(9):
+		*val = etm_read(ETMACTR9);
+		return 0;
+	case ETMACTRn(10):
+		*val = etm_read(ETMACTR10);
+		return 0;
+	case ETMACTRn(11):
+		*val = etm_read(ETMACTR11);
+		return 0;
+	case ETMACTRn(12):
+		*val = etm_read(ETMACTR12);
+		return 0;
+	case ETMACTRn(13):
+		*val = etm_read(ETMACTR13);
+		return 0;
+	case ETMACTRn(14):
+		*val = etm_read(ETMACTR14);
+		return 0;
+	case ETMACTRn(15):
+		*val = etm_read(ETMACTR15);
+		return 0;
+	case ETMCNTRLDVRn(0):
+		*val = etm_read(ETMCNTRLDVR0);
+		return 0;
+	case ETMCNTRLDVRn(1):
+		*val = etm_read(ETMCNTRLDVR1);
+		return 0;
+	case ETMCNTRLDVRn(2):
+		*val = etm_read(ETMCNTRLDVR2);
+		return 0;
+	case ETMCNTRLDVRn(3):
+		*val = etm_read(ETMCNTRLDVR3);
+		return 0;
+	case ETMCNTENRn(0):
+		*val = etm_read(ETMCNTENR0);
+		return 0;
+	case ETMCNTENRn(1):
+		*val = etm_read(ETMCNTENR1);
+		return 0;
+	case ETMCNTENRn(2):
+		*val = etm_read(ETMCNTENR2);
+		return 0;
+	case ETMCNTENRn(3):
+		*val = etm_read(ETMCNTENR3);
+		return 0;
+	case ETMCNTRLDEVRn(0):
+		*val = etm_read(ETMCNTRLDEVR0);
+		return 0;
+	case ETMCNTRLDEVRn(1):
+		*val = etm_read(ETMCNTRLDEVR1);
+		return 0;
+	case ETMCNTRLDEVRn(2):
+		*val = etm_read(ETMCNTRLDEVR2);
+		return 0;
+	case ETMCNTRLDEVRn(3):
+		*val = etm_read(ETMCNTRLDEVR3);
+		return 0;
+	case ETMCNTVRn(0):
+		*val = etm_read(ETMCNTVR0);
+		return 0;
+	case ETMCNTVRn(1):
+		*val = etm_read(ETMCNTVR1);
+		return 0;
+	case ETMCNTVRn(2):
+		*val = etm_read(ETMCNTVR2);
+		return 0;
+	case ETMCNTVRn(3):
+		*val = etm_read(ETMCNTVR3);
+		return 0;
+	case ETMSQ12EVR:
+		*val = etm_read(ETMSQ12EVR);
+		return 0;
+	case ETMSQ21EVR:
+		*val = etm_read(ETMSQ21EVR);
+		return 0;
+	case ETMSQ23EVR:
+		*val = etm_read(ETMSQ23EVR);
+		return 0;
+	case ETMSQ31EVR:
+		*val = etm_read(ETMSQ31EVR);
+		return 0;
+	case ETMSQ32EVR:
+		*val = etm_read(ETMSQ32EVR);
+		return 0;
+	case ETMSQ13EVR:
+		*val = etm_read(ETMSQ13EVR);
+		return 0;
+	case ETMSQR:
+		*val = etm_read(ETMSQR);
+		return 0;
+	case ETMEXTOUTEVRn(0):
+		*val = etm_read(ETMEXTOUTEVR0);
+		return 0;
+	case ETMEXTOUTEVRn(1):
+		*val = etm_read(ETMEXTOUTEVR1);
+		return 0;
+	case ETMEXTOUTEVRn(2):
+		*val = etm_read(ETMEXTOUTEVR2);
+		return 0;
+	case ETMEXTOUTEVRn(3):
+		*val = etm_read(ETMEXTOUTEVR3);
+		return 0;
+	case ETMCIDCVRn(0):
+		*val = etm_read(ETMCIDCVR0);
+		return 0;
+	case ETMCIDCVRn(1):
+		*val = etm_read(ETMCIDCVR1);
+		return 0;
+	case ETMCIDCVRn(2):
+		*val = etm_read(ETMCIDCVR2);
+		return 0;
+	case ETMCIDCMR:
+		*val = etm_read(ETMCIDCMR);
+		return 0;
+	case ETMIMPSPEC0:
+		*val = etm_read(ETMIMPSPEC0);
+		return 0;
+	case ETMIMPSPEC1:
+		*val = etm_read(ETMIMPSPEC1);
+		return 0;
+	case ETMIMPSPEC2:
+		*val = etm_read(ETMIMPSPEC2);
+		return 0;
+	case ETMIMPSPEC3:
+		*val = etm_read(ETMIMPSPEC3);
+		return 0;
+	case ETMIMPSPEC4:
+		*val = etm_read(ETMIMPSPEC4);
+		return 0;
+	case ETMIMPSPEC5:
+		*val = etm_read(ETMIMPSPEC5);
+		return 0;
+	case ETMIMPSPEC6:
+		*val = etm_read(ETMIMPSPEC6);
+		return 0;
+	case ETMIMPSPEC7:
+		*val = etm_read(ETMIMPSPEC7);
+		return 0;
+	case ETMSYNCFR:
+		*val = etm_read(ETMSYNCFR);
+		return 0;
+	case ETMIDR:
+		*val = etm_read(ETMIDR);
+		return 0;
+	case ETMCCER:
+		*val = etm_read(ETMCCER);
+		return 0;
+	case ETMEXTINSELR:
+		*val = etm_read(ETMEXTINSELR);
+		return 0;
+	case ETMTESSEICR:
+		*val = etm_read(ETMTESSEICR);
+		return 0;
+	case ETMEIBCR:
+		*val = etm_read(ETMEIBCR);
+		return 0;
+	case ETMTSEVR:
+		*val = etm_read(ETMTSEVR);
+		return 0;
+	case ETMAUXCR:
+		*val = etm_read(ETMAUXCR);
+		return 0;
+	case ETMTRACEIDR:
+		*val = etm_read(ETMTRACEIDR);
+		return 0;
+	case ETMVMIDCVR:
+		*val = etm_read(ETMVMIDCVR);
+		return 0;
+	case ETMOSLSR:
+		*val = etm_read(ETMOSLSR);
+		return 0;
+	case ETMOSSRR:
+		*val = etm_read(ETMOSSRR);
+		return 0;
+	case ETMPDCR:
+		*val = etm_read(ETMPDCR);
+		return 0;
+	case ETMPDSR:
+		*val = etm_read(ETMPDSR);
+		return 0;
+	default:
+		*val = 0;
+		return -EINVAL;
+	}
+}
+
+int etm_writel_cp14(u32 reg, u32 val)
+{
+	switch (reg) {
+	case ETMCR:
+		etm_write(val, ETMCR);
+		break;
+	case ETMTRIGGER:
+		etm_write(val, ETMTRIGGER);
+		break;
+	case ETMSR:
+		etm_write(val, ETMSR);
+		break;
+	case ETMTSSCR:
+		etm_write(val, ETMTSSCR);
+		break;
+	case ETMTEEVR:
+		etm_write(val, ETMTEEVR);
+		break;
+	case ETMTECR1:
+		etm_write(val, ETMTECR1);
+		break;
+	case ETMFFLR:
+		etm_write(val, ETMFFLR);
+		break;
+	case ETMACVRn(0):
+		etm_write(val, ETMACVR0);
+		break;
+	case ETMACVRn(1):
+		etm_write(val, ETMACVR1);
+		break;
+	case ETMACVRn(2):
+		etm_write(val, ETMACVR2);
+		break;
+	case ETMACVRn(3):
+		etm_write(val, ETMACVR3);
+		break;
+	case ETMACVRn(4):
+		etm_write(val, ETMACVR4);
+		break;
+	case ETMACVRn(5):
+		etm_write(val, ETMACVR5);
+		break;
+	case ETMACVRn(6):
+		etm_write(val, ETMACVR6);
+		break;
+	case ETMACVRn(7):
+		etm_write(val, ETMACVR7);
+		break;
+	case ETMACVRn(8):
+		etm_write(val, ETMACVR8);
+		break;
+	case ETMACVRn(9):
+		etm_write(val, ETMACVR9);
+		break;
+	case ETMACVRn(10):
+		etm_write(val, ETMACVR10);
+		break;
+	case ETMACVRn(11):
+		etm_write(val, ETMACVR11);
+		break;
+	case ETMACVRn(12):
+		etm_write(val, ETMACVR12);
+		break;
+	case ETMACVRn(13):
+		etm_write(val, ETMACVR13);
+		break;
+	case ETMACVRn(14):
+		etm_write(val, ETMACVR14);
+		break;
+	case ETMACVRn(15):
+		etm_write(val, ETMACVR15);
+		break;
+	case ETMACTRn(0):
+		etm_write(val, ETMACTR0);
+		break;
+	case ETMACTRn(1):
+		etm_write(val, ETMACTR1);
+		break;
+	case ETMACTRn(2):
+		etm_write(val, ETMACTR2);
+		break;
+	case ETMACTRn(3):
+		etm_write(val, ETMACTR3);
+		break;
+	case ETMACTRn(4):
+		etm_write(val, ETMACTR4);
+		break;
+	case ETMACTRn(5):
+		etm_write(val, ETMACTR5);
+		break;
+	case ETMACTRn(6):
+		etm_write(val, ETMACTR6);
+		break;
+	case ETMACTRn(7):
+		etm_write(val, ETMACTR7);
+		break;
+	case ETMACTRn(8):
+		etm_write(val, ETMACTR8);
+		break;
+	case ETMACTRn(9):
+		etm_write(val, ETMACTR9);
+		break;
+	case ETMACTRn(10):
+		etm_write(val, ETMACTR10);
+		break;
+	case ETMACTRn(11):
+		etm_write(val, ETMACTR11);
+		break;
+	case ETMACTRn(12):
+		etm_write(val, ETMACTR12);
+		break;
+	case ETMACTRn(13):
+		etm_write(val, ETMACTR13);
+		break;
+	case ETMACTRn(14):
+		etm_write(val, ETMACTR14);
+		break;
+	case ETMACTRn(15):
+		etm_write(val, ETMACTR15);
+		break;
+	case ETMCNTRLDVRn(0):
+		etm_write(val, ETMCNTRLDVR0);
+		break;
+	case ETMCNTRLDVRn(1):
+		etm_write(val, ETMCNTRLDVR1);
+		break;
+	case ETMCNTRLDVRn(2):
+		etm_write(val, ETMCNTRLDVR2);
+		break;
+	case ETMCNTRLDVRn(3):
+		etm_write(val, ETMCNTRLDVR3);
+		break;
+	case ETMCNTENRn(0):
+		etm_write(val, ETMCNTENR0);
+		break;
+	case ETMCNTENRn(1):
+		etm_write(val, ETMCNTENR1);
+		break;
+	case ETMCNTENRn(2):
+		etm_write(val, ETMCNTENR2);
+		break;
+	case ETMCNTENRn(3):
+		etm_write(val, ETMCNTENR3);
+		break;
+	case ETMCNTRLDEVRn(0):
+		etm_write(val, ETMCNTRLDEVR0);
+		break;
+	case ETMCNTRLDEVRn(1):
+		etm_write(val, ETMCNTRLDEVR1);
+		break;
+	case ETMCNTRLDEVRn(2):
+		etm_write(val, ETMCNTRLDEVR2);
+		break;
+	case ETMCNTRLDEVRn(3):
+		etm_write(val, ETMCNTRLDEVR3);
+		break;
+	case ETMCNTVRn(0):
+		etm_write(val, ETMCNTVR0);
+		break;
+	case ETMCNTVRn(1):
+		etm_write(val, ETMCNTVR1);
+		break;
+	case ETMCNTVRn(2):
+		etm_write(val, ETMCNTVR2);
+		break;
+	case ETMCNTVRn(3):
+		etm_write(val, ETMCNTVR3);
+		break;
+	case ETMSQ12EVR:
+		etm_write(val, ETMSQ12EVR);
+		break;
+	case ETMSQ21EVR:
+		etm_write(val, ETMSQ21EVR);
+		break;
+	case ETMSQ23EVR:
+		etm_write(val, ETMSQ23EVR);
+		break;
+	case ETMSQ31EVR:
+		etm_write(val, ETMSQ31EVR);
+		break;
+	case ETMSQ32EVR:
+		etm_write(val, ETMSQ32EVR);
+		break;
+	case ETMSQ13EVR:
+		etm_write(val, ETMSQ13EVR);
+		break;
+	case ETMSQR:
+		etm_write(val, ETMSQR);
+		break;
+	case ETMEXTOUTEVRn(0):
+		etm_write(val, ETMEXTOUTEVR0);
+		break;
+	case ETMEXTOUTEVRn(1):
+		etm_write(val, ETMEXTOUTEVR1);
+		break;
+	case ETMEXTOUTEVRn(2):
+		etm_write(val, ETMEXTOUTEVR2);
+		break;
+	case ETMEXTOUTEVRn(3):
+		etm_write(val, ETMEXTOUTEVR3);
+		break;
+	case ETMCIDCVRn(0):
+		etm_write(val, ETMCIDCVR0);
+		break;
+	case ETMCIDCVRn(1):
+		etm_write(val, ETMCIDCVR1);
+		break;
+	case ETMCIDCVRn(2):
+		etm_write(val, ETMCIDCVR2);
+		break;
+	case ETMCIDCMR:
+		etm_write(val, ETMCIDCMR);
+		break;
+	case ETMIMPSPEC0:
+		etm_write(val, ETMIMPSPEC0);
+		break;
+	case ETMIMPSPEC1:
+		etm_write(val, ETMIMPSPEC1);
+		break;
+	case ETMIMPSPEC2:
+		etm_write(val, ETMIMPSPEC2);
+		break;
+	case ETMIMPSPEC3:
+		etm_write(val, ETMIMPSPEC3);
+		break;
+	case ETMIMPSPEC4:
+		etm_write(val, ETMIMPSPEC4);
+		break;
+	case ETMIMPSPEC5:
+		etm_write(val, ETMIMPSPEC5);
+		break;
+	case ETMIMPSPEC6:
+		etm_write(val, ETMIMPSPEC6);
+		break;
+	case ETMIMPSPEC7:
+		etm_write(val, ETMIMPSPEC7);
+		break;
+	case ETMSYNCFR:
+		etm_write(val, ETMSYNCFR);
+		break;
+	case ETMEXTINSELR:
+		etm_write(val, ETMEXTINSELR);
+		break;
+	case ETMTESSEICR:
+		etm_write(val, ETMTESSEICR);
+		break;
+	case ETMEIBCR:
+		etm_write(val, ETMEIBCR);
+		break;
+	case ETMTSEVR:
+		etm_write(val, ETMTSEVR);
+		break;
+	case ETMAUXCR:
+		etm_write(val, ETMAUXCR);
+		break;
+	case ETMTRACEIDR:
+		etm_write(val, ETMTRACEIDR);
+		break;
+	case ETMVMIDCVR:
+		etm_write(val, ETMVMIDCVR);
+		break;
+	case ETMOSLAR:
+		etm_write(val, ETMOSLAR);
+		break;
+	case ETMOSSRR:
+		etm_write(val, ETMOSSRR);
+		break;
+	case ETMPDCR:
+		etm_write(val, ETMPDCR);
+		break;
+	case ETMPDSR:
+		etm_write(val, ETMPDSR);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
new file mode 100644
index 0000000000..89e8ed214e
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -0,0 +1,917 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/coresight.h>
+#include <linux/coresight-pmu.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/perf_event.h>
+#include <linux/percpu-defs.h>
+#include <linux/slab.h>
+#include <linux/stringhash.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+#include "coresight-config.h"
+#include "coresight-etm-perf.h"
+#include "coresight-priv.h"
+#include "coresight-syscfg.h"
+#include "coresight-trace-id.h"
+
+static struct pmu etm_pmu;
+static bool etm_perf_up;
+
+/*
+ * An ETM context for a running event includes the perf aux handle
+ * and aux_data. For ETM, the aux_data (etm_event_data), consists of
+ * the trace path and the sink configuration. The event data is accessible
+ * via perf_get_aux(handle). However, a sink could "end" a perf output
+ * handle via the IRQ handler. And if the "sink" encounters a failure
+ * to "begin" another session (e.g due to lack of space in the buffer),
+ * the handle will be cleared. Thus, the event_data may not be accessible
+ * from the handle when we get to the etm_event_stop(), which is required
+ * for stopping the trace path. The event_data is guaranteed to stay alive
+ * until "free_aux()", which cannot happen as long as the event is active on
+ * the ETM. Thus the event_data for the session must be part of the ETM context
+ * to make sure we can disable the trace path.
+ */
+struct etm_ctxt {
+	struct perf_output_handle handle;
+	struct etm_event_data *event_data;
+};
+
+static DEFINE_PER_CPU(struct etm_ctxt, etm_ctxt);
+static DEFINE_PER_CPU(struct coresight_device *, csdev_src);
+
+/*
+ * The PMU formats were orignally for ETMv3.5/PTM's ETMCR 'config';
+ * now take them as general formats and apply on all ETMs.
+ */
+PMU_FORMAT_ATTR(branch_broadcast, "config:"__stringify(ETM_OPT_BRANCH_BROADCAST));
+PMU_FORMAT_ATTR(cycacc,		"config:" __stringify(ETM_OPT_CYCACC));
+/* contextid1 enables tracing CONTEXTIDR_EL1 for ETMv4 */
+PMU_FORMAT_ATTR(contextid1,	"config:" __stringify(ETM_OPT_CTXTID));
+/* contextid2 enables tracing CONTEXTIDR_EL2 for ETMv4 */
+PMU_FORMAT_ATTR(contextid2,	"config:" __stringify(ETM_OPT_CTXTID2));
+PMU_FORMAT_ATTR(timestamp,	"config:" __stringify(ETM_OPT_TS));
+PMU_FORMAT_ATTR(retstack,	"config:" __stringify(ETM_OPT_RETSTK));
+/* preset - if sink ID is used as a configuration selector */
+PMU_FORMAT_ATTR(preset,		"config:0-3");
+/* Sink ID - same for all ETMs */
+PMU_FORMAT_ATTR(sinkid,		"config2:0-31");
+/* config ID - set if a system configuration is selected */
+PMU_FORMAT_ATTR(configid,	"config2:32-63");
+
+
+/*
+ * contextid always traces the "PID".  The PID is in CONTEXTIDR_EL1
+ * when the kernel is running at EL1; when the kernel is at EL2,
+ * the PID is in CONTEXTIDR_EL2.
+ */
+static ssize_t format_attr_contextid_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *page)
+{
+	int pid_fmt = ETM_OPT_CTXTID;
+
+#if IS_ENABLED(CONFIG_CORESIGHT_SOURCE_ETM4X)
+	pid_fmt = is_kernel_in_hyp_mode() ? ETM_OPT_CTXTID2 : ETM_OPT_CTXTID;
+#endif
+	return sprintf(page, "config:%d\n", pid_fmt);
+}
+
+static struct device_attribute format_attr_contextid =
+	__ATTR(contextid, 0444, format_attr_contextid_show, NULL);
+
+static struct attribute *etm_config_formats_attr[] = {
+	&format_attr_cycacc.attr,
+	&format_attr_contextid.attr,
+	&format_attr_contextid1.attr,
+	&format_attr_contextid2.attr,
+	&format_attr_timestamp.attr,
+	&format_attr_retstack.attr,
+	&format_attr_sinkid.attr,
+	&format_attr_preset.attr,
+	&format_attr_configid.attr,
+	&format_attr_branch_broadcast.attr,
+	NULL,
+};
+
+static const struct attribute_group etm_pmu_format_group = {
+	.name   = "format",
+	.attrs  = etm_config_formats_attr,
+};
+
+static struct attribute *etm_config_sinks_attr[] = {
+	NULL,
+};
+
+static const struct attribute_group etm_pmu_sinks_group = {
+	.name   = "sinks",
+	.attrs  = etm_config_sinks_attr,
+};
+
+static struct attribute *etm_config_events_attr[] = {
+	NULL,
+};
+
+static const struct attribute_group etm_pmu_events_group = {
+	.name   = "events",
+	.attrs  = etm_config_events_attr,
+};
+
+static const struct attribute_group *etm_pmu_attr_groups[] = {
+	&etm_pmu_format_group,
+	&etm_pmu_sinks_group,
+	&etm_pmu_events_group,
+	NULL,
+};
+
+static inline struct list_head **
+etm_event_cpu_path_ptr(struct etm_event_data *data, int cpu)
+{
+	return per_cpu_ptr(data->path, cpu);
+}
+
+static inline struct list_head *
+etm_event_cpu_path(struct etm_event_data *data, int cpu)
+{
+	return *etm_event_cpu_path_ptr(data, cpu);
+}
+
+static void etm_event_read(struct perf_event *event) {}
+
+static int etm_addr_filters_alloc(struct perf_event *event)
+{
+	struct etm_filters *filters;
+	int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu);
+
+	filters = kzalloc_node(sizeof(struct etm_filters), GFP_KERNEL, node);
+	if (!filters)
+		return -ENOMEM;
+
+	if (event->parent)
+		memcpy(filters, event->parent->hw.addr_filters,
+		       sizeof(*filters));
+
+	event->hw.addr_filters = filters;
+
+	return 0;
+}
+
+static void etm_event_destroy(struct perf_event *event)
+{
+	kfree(event->hw.addr_filters);
+	event->hw.addr_filters = NULL;
+}
+
+static int etm_event_init(struct perf_event *event)
+{
+	int ret = 0;
+
+	if (event->attr.type != etm_pmu.type) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	ret = etm_addr_filters_alloc(event);
+	if (ret)
+		goto out;
+
+	event->destroy = etm_event_destroy;
+out:
+	return ret;
+}
+
+static void free_sink_buffer(struct etm_event_data *event_data)
+{
+	int cpu;
+	cpumask_t *mask = &event_data->mask;
+	struct coresight_device *sink;
+
+	if (!event_data->snk_config)
+		return;
+
+	if (WARN_ON(cpumask_empty(mask)))
+		return;
+
+	cpu = cpumask_first(mask);
+	sink = coresight_get_sink(etm_event_cpu_path(event_data, cpu));
+	sink_ops(sink)->free_buffer(event_data->snk_config);
+}
+
+static void free_event_data(struct work_struct *work)
+{
+	int cpu;
+	cpumask_t *mask;
+	struct etm_event_data *event_data;
+
+	event_data = container_of(work, struct etm_event_data, work);
+	mask = &event_data->mask;
+
+	/* Free the sink buffers, if there are any */
+	free_sink_buffer(event_data);
+
+	/* clear any configuration we were using */
+	if (event_data->cfg_hash)
+		cscfg_deactivate_config(event_data->cfg_hash);
+
+	for_each_cpu(cpu, mask) {
+		struct list_head **ppath;
+
+		ppath = etm_event_cpu_path_ptr(event_data, cpu);
+		if (!(IS_ERR_OR_NULL(*ppath)))
+			coresight_release_path(*ppath);
+		*ppath = NULL;
+		coresight_trace_id_put_cpu_id(cpu);
+	}
+
+	/* mark perf event as done for trace id allocator */
+	coresight_trace_id_perf_stop();
+
+	free_percpu(event_data->path);
+	kfree(event_data);
+}
+
+static void *alloc_event_data(int cpu)
+{
+	cpumask_t *mask;
+	struct etm_event_data *event_data;
+
+	/* First get memory for the session's data */
+	event_data = kzalloc(sizeof(struct etm_event_data), GFP_KERNEL);
+	if (!event_data)
+		return NULL;
+
+
+	mask = &event_data->mask;
+	if (cpu != -1)
+		cpumask_set_cpu(cpu, mask);
+	else
+		cpumask_copy(mask, cpu_present_mask);
+
+	/*
+	 * Each CPU has a single path between source and destination.  As such
+	 * allocate an array using CPU numbers as indexes.  That way a path
+	 * for any CPU can easily be accessed at any given time.  We proceed
+	 * the same way for sessions involving a single CPU.  The cost of
+	 * unused memory when dealing with single CPU trace scenarios is small
+	 * compared to the cost of searching through an optimized array.
+	 */
+	event_data->path = alloc_percpu(struct list_head *);
+
+	if (!event_data->path) {
+		kfree(event_data);
+		return NULL;
+	}
+
+	return event_data;
+}
+
+static void etm_free_aux(void *data)
+{
+	struct etm_event_data *event_data = data;
+
+	schedule_work(&event_data->work);
+}
+
+/*
+ * Check if two given sinks are compatible with each other,
+ * so that they can use the same sink buffers, when an event
+ * moves around.
+ */
+static bool sinks_compatible(struct coresight_device *a,
+			     struct coresight_device *b)
+{
+	if (!a || !b)
+		return false;
+	/*
+	 * If the sinks are of the same subtype and driven
+	 * by the same driver, we can use the same buffer
+	 * on these sinks.
+	 */
+	return (a->subtype.sink_subtype == b->subtype.sink_subtype) &&
+	       (sink_ops(a) == sink_ops(b));
+}
+
+static void *etm_setup_aux(struct perf_event *event, void **pages,
+			   int nr_pages, bool overwrite)
+{
+	u32 id, cfg_hash;
+	int cpu = event->cpu;
+	int trace_id;
+	cpumask_t *mask;
+	struct coresight_device *sink = NULL;
+	struct coresight_device *user_sink = NULL, *last_sink = NULL;
+	struct etm_event_data *event_data = NULL;
+
+	event_data = alloc_event_data(cpu);
+	if (!event_data)
+		return NULL;
+	INIT_WORK(&event_data->work, free_event_data);
+
+	/* First get the selected sink from user space. */
+	if (event->attr.config2 & GENMASK_ULL(31, 0)) {
+		id = (u32)event->attr.config2;
+		sink = user_sink = coresight_get_sink_by_id(id);
+	}
+
+	/* tell the trace ID allocator that a perf event is starting up */
+	coresight_trace_id_perf_start();
+
+	/* check if user wants a coresight configuration selected */
+	cfg_hash = (u32)((event->attr.config2 & GENMASK_ULL(63, 32)) >> 32);
+	if (cfg_hash) {
+		if (cscfg_activate_config(cfg_hash))
+			goto err;
+		event_data->cfg_hash = cfg_hash;
+	}
+
+	mask = &event_data->mask;
+
+	/*
+	 * Setup the path for each CPU in a trace session. We try to build
+	 * trace path for each CPU in the mask. If we don't find an ETM
+	 * for the CPU or fail to build a path, we clear the CPU from the
+	 * mask and continue with the rest. If ever we try to trace on those
+	 * CPUs, we can handle it and fail the session.
+	 */
+	for_each_cpu(cpu, mask) {
+		struct list_head *path;
+		struct coresight_device *csdev;
+
+		csdev = per_cpu(csdev_src, cpu);
+		/*
+		 * If there is no ETM associated with this CPU clear it from
+		 * the mask and continue with the rest. If ever we try to trace
+		 * on this CPU, we handle it accordingly.
+		 */
+		if (!csdev) {
+			cpumask_clear_cpu(cpu, mask);
+			continue;
+		}
+
+		/*
+		 * No sink provided - look for a default sink for all the ETMs,
+		 * where this event can be scheduled.
+		 * We allocate the sink specific buffers only once for this
+		 * event. If the ETMs have different default sink devices, we
+		 * can only use a single "type" of sink as the event can carry
+		 * only one sink specific buffer. Thus we have to make sure
+		 * that the sinks are of the same type and driven by the same
+		 * driver, as the one we allocate the buffer for. As such
+		 * we choose the first sink and check if the remaining ETMs
+		 * have a compatible default sink. We don't trace on a CPU
+		 * if the sink is not compatible.
+		 */
+		if (!user_sink) {
+			/* Find the default sink for this ETM */
+			sink = coresight_find_default_sink(csdev);
+			if (!sink) {
+				cpumask_clear_cpu(cpu, mask);
+				continue;
+			}
+
+			/* Check if this sink compatible with the last sink */
+			if (last_sink && !sinks_compatible(last_sink, sink)) {
+				cpumask_clear_cpu(cpu, mask);
+				continue;
+			}
+			last_sink = sink;
+		}
+
+		/*
+		 * Building a path doesn't enable it, it simply builds a
+		 * list of devices from source to sink that can be
+		 * referenced later when the path is actually needed.
+		 */
+		path = coresight_build_path(csdev, sink);
+		if (IS_ERR(path)) {
+			cpumask_clear_cpu(cpu, mask);
+			continue;
+		}
+
+		/* ensure we can allocate a trace ID for this CPU */
+		trace_id = coresight_trace_id_get_cpu_id(cpu);
+		if (!IS_VALID_CS_TRACE_ID(trace_id)) {
+			cpumask_clear_cpu(cpu, mask);
+			coresight_release_path(path);
+			continue;
+		}
+
+		*etm_event_cpu_path_ptr(event_data, cpu) = path;
+	}
+
+	/* no sink found for any CPU - cannot trace */
+	if (!sink)
+		goto err;
+
+	/* If we don't have any CPUs ready for tracing, abort */
+	cpu = cpumask_first(mask);
+	if (cpu >= nr_cpu_ids)
+		goto err;
+
+	if (!sink_ops(sink)->alloc_buffer || !sink_ops(sink)->free_buffer)
+		goto err;
+
+	/*
+	 * Allocate the sink buffer for this session. All the sinks
+	 * where this event can be scheduled are ensured to be of the
+	 * same type. Thus the same sink configuration is used by the
+	 * sinks.
+	 */
+	event_data->snk_config =
+			sink_ops(sink)->alloc_buffer(sink, event, pages,
+						     nr_pages, overwrite);
+	if (!event_data->snk_config)
+		goto err;
+
+out:
+	return event_data;
+
+err:
+	etm_free_aux(event_data);
+	event_data = NULL;
+	goto out;
+}
+
+static void etm_event_start(struct perf_event *event, int flags)
+{
+	int cpu = smp_processor_id();
+	struct etm_event_data *event_data;
+	struct etm_ctxt *ctxt = this_cpu_ptr(&etm_ctxt);
+	struct perf_output_handle *handle = &ctxt->handle;
+	struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu);
+	struct list_head *path;
+	u64 hw_id;
+
+	if (!csdev)
+		goto fail;
+
+	/* Have we messed up our tracking ? */
+	if (WARN_ON(ctxt->event_data))
+		goto fail;
+
+	/*
+	 * Deal with the ring buffer API and get a handle on the
+	 * session's information.
+	 */
+	event_data = perf_aux_output_begin(handle, event);
+	if (!event_data)
+		goto fail;
+
+	/*
+	 * Check if this ETM is allowed to trace, as decided
+	 * at etm_setup_aux(). This could be due to an unreachable
+	 * sink from this ETM. We can't do much in this case if
+	 * the sink was specified or hinted to the driver. For
+	 * now, simply don't record anything on this ETM.
+	 *
+	 * As such we pretend that everything is fine, and let
+	 * it continue without actually tracing. The event could
+	 * continue tracing when it moves to a CPU where it is
+	 * reachable to a sink.
+	 */
+	if (!cpumask_test_cpu(cpu, &event_data->mask))
+		goto out;
+
+	path = etm_event_cpu_path(event_data, cpu);
+	/* We need a sink, no need to continue without one */
+	sink = coresight_get_sink(path);
+	if (WARN_ON_ONCE(!sink))
+		goto fail_end_stop;
+
+	/* Nothing will happen without a path */
+	if (coresight_enable_path(path, CS_MODE_PERF, handle))
+		goto fail_end_stop;
+
+	/* Finally enable the tracer */
+	if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF))
+		goto fail_disable_path;
+
+	/*
+	 * output cpu / trace ID in perf record, once for the lifetime
+	 * of the event.
+	 */
+	if (!cpumask_test_cpu(cpu, &event_data->aux_hwid_done)) {
+		cpumask_set_cpu(cpu, &event_data->aux_hwid_done);
+		hw_id = FIELD_PREP(CS_AUX_HW_ID_VERSION_MASK,
+				   CS_AUX_HW_ID_CURR_VERSION);
+		hw_id |= FIELD_PREP(CS_AUX_HW_ID_TRACE_ID_MASK,
+				    coresight_trace_id_read_cpu_id(cpu));
+		perf_report_aux_output_id(event, hw_id);
+	}
+
+out:
+	/* Tell the perf core the event is alive */
+	event->hw.state = 0;
+	/* Save the event_data for this ETM */
+	ctxt->event_data = event_data;
+	return;
+
+fail_disable_path:
+	coresight_disable_path(path);
+fail_end_stop:
+	/*
+	 * Check if the handle is still associated with the event,
+	 * to handle cases where if the sink failed to start the
+	 * trace and TRUNCATED the handle already.
+	 */
+	if (READ_ONCE(handle->event)) {
+		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
+		perf_aux_output_end(handle, 0);
+	}
+fail:
+	event->hw.state = PERF_HES_STOPPED;
+	return;
+}
+
+static void etm_event_stop(struct perf_event *event, int mode)
+{
+	int cpu = smp_processor_id();
+	unsigned long size;
+	struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu);
+	struct etm_ctxt *ctxt = this_cpu_ptr(&etm_ctxt);
+	struct perf_output_handle *handle = &ctxt->handle;
+	struct etm_event_data *event_data;
+	struct list_head *path;
+
+	/*
+	 * If we still have access to the event_data via handle,
+	 * confirm that we haven't messed up the tracking.
+	 */
+	if (handle->event &&
+	    WARN_ON(perf_get_aux(handle) != ctxt->event_data))
+		return;
+
+	event_data = ctxt->event_data;
+	/* Clear the event_data as this ETM is stopping the trace. */
+	ctxt->event_data = NULL;
+
+	if (event->hw.state == PERF_HES_STOPPED)
+		return;
+
+	/* We must have a valid event_data for a running event */
+	if (WARN_ON(!event_data))
+		return;
+
+	/*
+	 * Check if this ETM was allowed to trace, as decided at
+	 * etm_setup_aux(). If it wasn't allowed to trace, then
+	 * nothing needs to be torn down other than outputting a
+	 * zero sized record.
+	 */
+	if (handle->event && (mode & PERF_EF_UPDATE) &&
+	    !cpumask_test_cpu(cpu, &event_data->mask)) {
+		event->hw.state = PERF_HES_STOPPED;
+		perf_aux_output_end(handle, 0);
+		return;
+	}
+
+	if (!csdev)
+		return;
+
+	path = etm_event_cpu_path(event_data, cpu);
+	if (!path)
+		return;
+
+	sink = coresight_get_sink(path);
+	if (!sink)
+		return;
+
+	/* stop tracer */
+	source_ops(csdev)->disable(csdev, event);
+
+	/* tell the core */
+	event->hw.state = PERF_HES_STOPPED;
+
+	/*
+	 * If the handle is not bound to an event anymore
+	 * (e.g, the sink driver was unable to restart the
+	 * handle due to lack of buffer space), we don't
+	 * have to do anything here.
+	 */
+	if (handle->event && (mode & PERF_EF_UPDATE)) {
+		if (WARN_ON_ONCE(handle->event != event))
+			return;
+
+		/* update trace information */
+		if (!sink_ops(sink)->update_buffer)
+			return;
+
+		size = sink_ops(sink)->update_buffer(sink, handle,
+					      event_data->snk_config);
+		/*
+		 * Make sure the handle is still valid as the
+		 * sink could have closed it from an IRQ.
+		 * The sink driver must handle the race with
+		 * update_buffer() and IRQ. Thus either we
+		 * should get a valid handle and valid size
+		 * (which may be 0).
+		 *
+		 * But we should never get a non-zero size with
+		 * an invalid handle.
+		 */
+		if (READ_ONCE(handle->event))
+			perf_aux_output_end(handle, size);
+		else
+			WARN_ON(size);
+	}
+
+	/* Disabling the path make its elements available to other sessions */
+	coresight_disable_path(path);
+}
+
+static int etm_event_add(struct perf_event *event, int mode)
+{
+	int ret = 0;
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (mode & PERF_EF_START) {
+		etm_event_start(event, 0);
+		if (hwc->state & PERF_HES_STOPPED)
+			ret = -EINVAL;
+	} else {
+		hwc->state = PERF_HES_STOPPED;
+	}
+
+	return ret;
+}
+
+static void etm_event_del(struct perf_event *event, int mode)
+{
+	etm_event_stop(event, PERF_EF_UPDATE);
+}
+
+static int etm_addr_filters_validate(struct list_head *filters)
+{
+	bool range = false, address = false;
+	int index = 0;
+	struct perf_addr_filter *filter;
+
+	list_for_each_entry(filter, filters, entry) {
+		/*
+		 * No need to go further if there's no more
+		 * room for filters.
+		 */
+		if (++index > ETM_ADDR_CMP_MAX)
+			return -EOPNOTSUPP;
+
+		/* filter::size==0 means single address trigger */
+		if (filter->size) {
+			/*
+			 * The existing code relies on START/STOP filters
+			 * being address filters.
+			 */
+			if (filter->action == PERF_ADDR_FILTER_ACTION_START ||
+			    filter->action == PERF_ADDR_FILTER_ACTION_STOP)
+				return -EOPNOTSUPP;
+
+			range = true;
+		} else
+			address = true;
+
+		/*
+		 * At this time we don't allow range and start/stop filtering
+		 * to cohabitate, they have to be mutually exclusive.
+		 */
+		if (range && address)
+			return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static void etm_addr_filters_sync(struct perf_event *event)
+{
+	struct perf_addr_filters_head *head = perf_event_addr_filters(event);
+	unsigned long start, stop;
+	struct perf_addr_filter_range *fr = event->addr_filter_ranges;
+	struct etm_filters *filters = event->hw.addr_filters;
+	struct etm_filter *etm_filter;
+	struct perf_addr_filter *filter;
+	int i = 0;
+
+	list_for_each_entry(filter, &head->list, entry) {
+		start = fr[i].start;
+		stop = start + fr[i].size;
+		etm_filter = &filters->etm_filter[i];
+
+		switch (filter->action) {
+		case PERF_ADDR_FILTER_ACTION_FILTER:
+			etm_filter->start_addr = start;
+			etm_filter->stop_addr = stop;
+			etm_filter->type = ETM_ADDR_TYPE_RANGE;
+			break;
+		case PERF_ADDR_FILTER_ACTION_START:
+			etm_filter->start_addr = start;
+			etm_filter->type = ETM_ADDR_TYPE_START;
+			break;
+		case PERF_ADDR_FILTER_ACTION_STOP:
+			etm_filter->stop_addr = stop;
+			etm_filter->type = ETM_ADDR_TYPE_STOP;
+			break;
+		}
+		i++;
+	}
+
+	filters->nr_filters = i;
+}
+
+int etm_perf_symlink(struct coresight_device *csdev, bool link)
+{
+	char entry[sizeof("cpu9999999")];
+	int ret = 0, cpu = source_ops(csdev)->cpu_id(csdev);
+	struct device *pmu_dev = etm_pmu.dev;
+	struct device *cs_dev = &csdev->dev;
+
+	sprintf(entry, "cpu%d", cpu);
+
+	if (!etm_perf_up)
+		return -EPROBE_DEFER;
+
+	if (link) {
+		ret = sysfs_create_link(&pmu_dev->kobj, &cs_dev->kobj, entry);
+		if (ret)
+			return ret;
+		per_cpu(csdev_src, cpu) = csdev;
+	} else {
+		sysfs_remove_link(&pmu_dev->kobj, entry);
+		per_cpu(csdev_src, cpu) = NULL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(etm_perf_symlink);
+
+static ssize_t etm_perf_sink_name_show(struct device *dev,
+				       struct device_attribute *dattr,
+				       char *buf)
+{
+	struct dev_ext_attribute *ea;
+
+	ea = container_of(dattr, struct dev_ext_attribute, attr);
+	return scnprintf(buf, PAGE_SIZE, "0x%lx\n", (unsigned long)(ea->var));
+}
+
+static struct dev_ext_attribute *
+etm_perf_add_symlink_group(struct device *dev, const char *name, const char *group_name)
+{
+	struct dev_ext_attribute *ea;
+	unsigned long hash;
+	int ret;
+	struct device *pmu_dev = etm_pmu.dev;
+
+	if (!etm_perf_up)
+		return ERR_PTR(-EPROBE_DEFER);
+
+	ea = devm_kzalloc(dev, sizeof(*ea), GFP_KERNEL);
+	if (!ea)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 * If this function is called adding a sink then the hash is used for
+	 * sink selection - see function coresight_get_sink_by_id().
+	 * If adding a configuration then the hash is used for selection in
+	 * cscfg_activate_config()
+	 */
+	hash = hashlen_hash(hashlen_string(NULL, name));
+
+	sysfs_attr_init(&ea->attr.attr);
+	ea->attr.attr.name = devm_kstrdup(dev, name, GFP_KERNEL);
+	if (!ea->attr.attr.name)
+		return ERR_PTR(-ENOMEM);
+
+	ea->attr.attr.mode = 0444;
+	ea->var = (unsigned long *)hash;
+
+	ret = sysfs_add_file_to_group(&pmu_dev->kobj,
+				      &ea->attr.attr, group_name);
+
+	return ret ? ERR_PTR(ret) : ea;
+}
+
+int etm_perf_add_symlink_sink(struct coresight_device *csdev)
+{
+	const char *name;
+	struct device *dev = &csdev->dev;
+	int err = 0;
+
+	if (csdev->type != CORESIGHT_DEV_TYPE_SINK &&
+	    csdev->type != CORESIGHT_DEV_TYPE_LINKSINK)
+		return -EINVAL;
+
+	if (csdev->ea != NULL)
+		return -EINVAL;
+
+	name = dev_name(dev);
+	csdev->ea = etm_perf_add_symlink_group(dev, name, "sinks");
+	if (IS_ERR(csdev->ea)) {
+		err = PTR_ERR(csdev->ea);
+		csdev->ea = NULL;
+	} else
+		csdev->ea->attr.show = etm_perf_sink_name_show;
+
+	return err;
+}
+
+static void etm_perf_del_symlink_group(struct dev_ext_attribute *ea, const char *group_name)
+{
+	struct device *pmu_dev = etm_pmu.dev;
+
+	sysfs_remove_file_from_group(&pmu_dev->kobj,
+				     &ea->attr.attr, group_name);
+}
+
+void etm_perf_del_symlink_sink(struct coresight_device *csdev)
+{
+	if (csdev->type != CORESIGHT_DEV_TYPE_SINK &&
+	    csdev->type != CORESIGHT_DEV_TYPE_LINKSINK)
+		return;
+
+	if (!csdev->ea)
+		return;
+
+	etm_perf_del_symlink_group(csdev->ea, "sinks");
+	csdev->ea = NULL;
+}
+
+static ssize_t etm_perf_cscfg_event_show(struct device *dev,
+					 struct device_attribute *dattr,
+					 char *buf)
+{
+	struct dev_ext_attribute *ea;
+
+	ea = container_of(dattr, struct dev_ext_attribute, attr);
+	return scnprintf(buf, PAGE_SIZE, "configid=0x%lx\n", (unsigned long)(ea->var));
+}
+
+int etm_perf_add_symlink_cscfg(struct device *dev, struct cscfg_config_desc *config_desc)
+{
+	int err = 0;
+
+	if (config_desc->event_ea != NULL)
+		return 0;
+
+	config_desc->event_ea = etm_perf_add_symlink_group(dev, config_desc->name, "events");
+
+	/* set the show function to the custom cscfg event */
+	if (!IS_ERR(config_desc->event_ea))
+		config_desc->event_ea->attr.show = etm_perf_cscfg_event_show;
+	else {
+		err = PTR_ERR(config_desc->event_ea);
+		config_desc->event_ea = NULL;
+	}
+
+	return err;
+}
+
+void etm_perf_del_symlink_cscfg(struct cscfg_config_desc *config_desc)
+{
+	if (!config_desc->event_ea)
+		return;
+
+	etm_perf_del_symlink_group(config_desc->event_ea, "events");
+	config_desc->event_ea = NULL;
+}
+
+int __init etm_perf_init(void)
+{
+	int ret;
+
+	etm_pmu.capabilities		= (PERF_PMU_CAP_EXCLUSIVE |
+					   PERF_PMU_CAP_ITRACE);
+
+	etm_pmu.attr_groups		= etm_pmu_attr_groups;
+	etm_pmu.task_ctx_nr		= perf_sw_context;
+	etm_pmu.read			= etm_event_read;
+	etm_pmu.event_init		= etm_event_init;
+	etm_pmu.setup_aux		= etm_setup_aux;
+	etm_pmu.free_aux		= etm_free_aux;
+	etm_pmu.start			= etm_event_start;
+	etm_pmu.stop			= etm_event_stop;
+	etm_pmu.add			= etm_event_add;
+	etm_pmu.del			= etm_event_del;
+	etm_pmu.addr_filters_sync	= etm_addr_filters_sync;
+	etm_pmu.addr_filters_validate	= etm_addr_filters_validate;
+	etm_pmu.nr_addr_filters		= ETM_ADDR_CMP_MAX;
+	etm_pmu.module			= THIS_MODULE;
+
+	ret = perf_pmu_register(&etm_pmu, CORESIGHT_ETM_PMU_NAME, -1);
+	if (ret == 0)
+		etm_perf_up = true;
+
+	return ret;
+}
+
+void etm_perf_exit(void)
+{
+	perf_pmu_unregister(&etm_pmu);
+}
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.h b/drivers/hwtracing/coresight/coresight-etm-perf.h
new file mode 100644
index 0000000000..bebbadee2c
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#ifndef _CORESIGHT_ETM_PERF_H
+#define _CORESIGHT_ETM_PERF_H
+
+#include <linux/percpu-defs.h>
+#include "coresight-priv.h"
+
+struct coresight_device;
+struct cscfg_config_desc;
+
+/*
+ * In both ETMv3 and v4 the maximum number of address comparator implentable
+ * is 8.  The actual number is implementation specific and will be checked
+ * when filters are applied.
+ */
+#define ETM_ADDR_CMP_MAX	8
+
+/**
+ * struct etm_filter - single instruction range or start/stop configuration.
+ * @start_addr:	The address to start tracing on.
+ * @stop_addr:	The address to stop tracing on.
+ * @type:	Is this a range or start/stop filter.
+ */
+struct etm_filter {
+	unsigned long start_addr;
+	unsigned long stop_addr;
+	enum etm_addr_type type;
+};
+
+/**
+ * struct etm_filters - set of filters for a session
+ * @etm_filter:	All the filters for this session.
+ * @nr_filters:	Number of filters
+ * @ssstatus:	Status of the start/stop logic.
+ */
+struct etm_filters {
+	struct etm_filter	etm_filter[ETM_ADDR_CMP_MAX];
+	unsigned int		nr_filters;
+	bool			ssstatus;
+};
+
+/**
+ * struct etm_event_data - Coresight specifics associated to an event
+ * @work:		Handle to free allocated memory outside IRQ context.
+ * @mask:		Hold the CPU(s) this event was set for.
+ * @aux_hwid_done:	Whether a CPU has emitted the TraceID packet or not.
+ * @snk_config:		The sink configuration.
+ * @cfg_hash:		The hash id of any coresight config selected.
+ * @path:		An array of path, each slot for one CPU.
+ */
+struct etm_event_data {
+	struct work_struct work;
+	cpumask_t mask;
+	cpumask_t aux_hwid_done;
+	void *snk_config;
+	u32 cfg_hash;
+	struct list_head * __percpu *path;
+};
+
+#if IS_ENABLED(CONFIG_CORESIGHT)
+int etm_perf_symlink(struct coresight_device *csdev, bool link);
+int etm_perf_add_symlink_sink(struct coresight_device *csdev);
+void etm_perf_del_symlink_sink(struct coresight_device *csdev);
+static inline void *etm_perf_sink_config(struct perf_output_handle *handle)
+{
+	struct etm_event_data *data = perf_get_aux(handle);
+
+	if (data)
+		return data->snk_config;
+	return NULL;
+}
+int etm_perf_add_symlink_cscfg(struct device *dev,
+			       struct cscfg_config_desc *config_desc);
+void etm_perf_del_symlink_cscfg(struct cscfg_config_desc *config_desc);
+#else
+static inline int etm_perf_symlink(struct coresight_device *csdev, bool link)
+{ return -EINVAL; }
+int etm_perf_add_symlink_sink(struct coresight_device *csdev)
+{ return -EINVAL; }
+void etm_perf_del_symlink_sink(struct coresight_device *csdev) {}
+static inline void *etm_perf_sink_config(struct perf_output_handle *handle)
+{
+	return NULL;
+}
+int etm_perf_add_symlink_cscfg(struct device *dev,
+			       struct cscfg_config_desc *config_desc)
+{ return -EINVAL; }
+void etm_perf_del_symlink_cscfg(struct cscfg_config_desc *config_desc) {}
+
+#endif /* CONFIG_CORESIGHT */
+
+int __init etm_perf_init(void);
+void etm_perf_exit(void);
+
+#endif
diff --git a/drivers/hwtracing/coresight/coresight-etm.h b/drivers/hwtracing/coresight/coresight-etm.h
new file mode 100644
index 0000000000..9a0d08b092
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etm.h
@@ -0,0 +1,291 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2014-2015, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _CORESIGHT_CORESIGHT_ETM_H
+#define _CORESIGHT_CORESIGHT_ETM_H
+
+#include <asm/local.h>
+#include <linux/spinlock.h>
+#include "coresight-priv.h"
+
+/*
+ * Device registers:
+ * 0x000 - 0x2FC: Trace         registers
+ * 0x300 - 0x314: Management    registers
+ * 0x318 - 0xEFC: Trace         registers
+ *
+ * Coresight registers
+ * 0xF00 - 0xF9C: Management    registers
+ * 0xFA0 - 0xFA4: Management    registers in PFTv1.0
+ *                Trace         registers in PFTv1.1
+ * 0xFA8 - 0xFFC: Management    registers
+ */
+
+/* Trace registers (0x000-0x2FC) */
+#define ETMCR			0x000
+#define ETMCCR			0x004
+#define ETMTRIGGER		0x008
+#define ETMSR			0x010
+#define ETMSCR			0x014
+#define ETMTSSCR		0x018
+#define ETMTECR2		0x01c
+#define ETMTEEVR		0x020
+#define ETMTECR1		0x024
+#define ETMFFLR			0x02c
+#define ETMACVRn(n)		(0x040 + (n * 4))
+#define ETMACTRn(n)		(0x080 + (n * 4))
+#define ETMCNTRLDVRn(n)		(0x140 + (n * 4))
+#define ETMCNTENRn(n)		(0x150 + (n * 4))
+#define ETMCNTRLDEVRn(n)	(0x160 + (n * 4))
+#define ETMCNTVRn(n)		(0x170 + (n * 4))
+#define ETMSQ12EVR		0x180
+#define ETMSQ21EVR		0x184
+#define ETMSQ23EVR		0x188
+#define ETMSQ31EVR		0x18c
+#define ETMSQ32EVR		0x190
+#define ETMSQ13EVR		0x194
+#define ETMSQR			0x19c
+#define ETMEXTOUTEVRn(n)	(0x1a0 + (n * 4))
+#define ETMCIDCVRn(n)		(0x1b0 + (n * 4))
+#define ETMCIDCMR		0x1bc
+#define ETMIMPSPEC0		0x1c0
+#define ETMIMPSPEC1		0x1c4
+#define ETMIMPSPEC2		0x1c8
+#define ETMIMPSPEC3		0x1cc
+#define ETMIMPSPEC4		0x1d0
+#define ETMIMPSPEC5		0x1d4
+#define ETMIMPSPEC6		0x1d8
+#define ETMIMPSPEC7		0x1dc
+#define ETMSYNCFR		0x1e0
+#define ETMIDR			0x1e4
+#define ETMCCER			0x1e8
+#define ETMEXTINSELR		0x1ec
+#define ETMTESSEICR		0x1f0
+#define ETMEIBCR		0x1f4
+#define ETMTSEVR		0x1f8
+#define ETMAUXCR		0x1fc
+#define ETMTRACEIDR		0x200
+#define ETMVMIDCVR		0x240
+/* Management registers (0x300-0x314) */
+#define ETMOSLAR		0x300
+#define ETMOSLSR		0x304
+#define ETMOSSRR		0x308
+#define ETMPDCR			0x310
+#define ETMPDSR			0x314
+#define ETM_MAX_ADDR_CMP	16
+#define ETM_MAX_CNTR		4
+#define ETM_MAX_CTXID_CMP	3
+
+/* Register definition */
+/* ETMCR - 0x00 */
+#define ETMCR_PWD_DWN		BIT(0)
+#define ETMCR_STALL_MODE	BIT(7)
+#define ETMCR_BRANCH_BROADCAST	BIT(8)
+#define ETMCR_ETM_PRG		BIT(10)
+#define ETMCR_ETM_EN		BIT(11)
+#define ETMCR_CYC_ACC		BIT(12)
+#define ETMCR_CTXID_SIZE	(BIT(14)|BIT(15))
+#define ETMCR_TIMESTAMP_EN	BIT(28)
+#define ETMCR_RETURN_STACK	BIT(29)
+/* ETMCCR - 0x04 */
+#define ETMCCR_FIFOFULL		BIT(23)
+/* ETMPDCR - 0x310 */
+#define ETMPDCR_PWD_UP		BIT(3)
+/* ETMTECR1 - 0x024 */
+#define ETMTECR1_ADDR_COMP_1	BIT(0)
+#define ETMTECR1_INC_EXC	BIT(24)
+#define ETMTECR1_START_STOP	BIT(25)
+/* ETMCCER - 0x1E8 */
+#define ETMCCER_TIMESTAMP	BIT(22)
+#define ETMCCER_RETSTACK	BIT(23)
+
+#define ETM_MODE_EXCLUDE	BIT(0)
+#define ETM_MODE_CYCACC		BIT(1)
+#define ETM_MODE_STALL		BIT(2)
+#define ETM_MODE_TIMESTAMP	BIT(3)
+#define ETM_MODE_CTXID		BIT(4)
+#define ETM_MODE_BBROAD		BIT(5)
+#define ETM_MODE_RET_STACK	BIT(6)
+#define ETM_MODE_ALL		(ETM_MODE_EXCLUDE | ETM_MODE_CYCACC | \
+				 ETM_MODE_STALL | ETM_MODE_TIMESTAMP | \
+				 ETM_MODE_BBROAD | ETM_MODE_RET_STACK | \
+				 ETM_MODE_CTXID | ETM_MODE_EXCL_KERN | \
+				 ETM_MODE_EXCL_USER)
+
+#define ETM_SQR_MASK		0x3
+#define ETM_TRACEID_MASK	0x3f
+#define ETM_EVENT_MASK		0x1ffff
+#define ETM_SYNC_MASK		0xfff
+#define ETM_ALL_MASK		0xffffffff
+
+#define ETMSR_PROG_BIT		1
+#define ETM_SEQ_STATE_MAX_VAL	(0x2)
+#define PORT_SIZE_MASK		(GENMASK(21, 21) | GENMASK(6, 4))
+
+#define ETM_HARD_WIRE_RES_A	/* Hard wired, always true */	\
+				((0x0f << 0)	|		\
+				/* Resource index A */		\
+				(0x06 << 4))
+
+#define ETM_ADD_COMP_0		/* Single addr comparator 1 */	\
+				((0x00 << 7)	|		\
+				/* Resource index B */		\
+				(0x00 << 11))
+
+#define ETM_EVENT_NOT_A		BIT(14) /* NOT(A) */
+
+#define ETM_DEFAULT_EVENT_VAL	(ETM_HARD_WIRE_RES_A	|	\
+				 ETM_ADD_COMP_0		|	\
+				 ETM_EVENT_NOT_A)
+
+/**
+ * struct etm_config - configuration information related to an ETM
+ * @mode:	controls various modes supported by this ETM/PTM.
+ * @ctrl:	used in conjunction with @mode.
+ * @trigger_event: setting for register ETMTRIGGER.
+ * @startstop_ctrl: setting for register ETMTSSCR.
+ * @enable_event: setting for register ETMTEEVR.
+ * @enable_ctrl1: setting for register ETMTECR1.
+ * @enable_ctrl2: setting for register ETMTECR2.
+ * @fifofull_level: setting for register ETMFFLR.
+ * @addr_idx:	index for the address comparator selection.
+ * @addr_val:	value for address comparator register.
+ * @addr_acctype: access type for address comparator register.
+ * @addr_type:	current status of the comparator register.
+ * @cntr_idx:	index for the counter register selection.
+ * @cntr_rld_val: reload value of a counter register.
+ * @cntr_event:	control for counter enable register.
+ * @cntr_rld_event: value for counter reload event register.
+ * @cntr_val:	counter value register.
+ * @seq_12_event: event causing the transition from 1 to 2.
+ * @seq_21_event: event causing the transition from 2 to 1.
+ * @seq_23_event: event causing the transition from 2 to 3.
+ * @seq_31_event: event causing the transition from 3 to 1.
+ * @seq_32_event: event causing the transition from 3 to 2.
+ * @seq_13_event: event causing the transition from 1 to 3.
+ * @seq_curr_state: current value of the sequencer register.
+ * @ctxid_idx: index for the context ID registers.
+ * @ctxid_pid: value for the context ID to trigger on.
+ * @ctxid_mask: mask applicable to all the context IDs.
+ * @sync_freq:	Synchronisation frequency.
+ * @timestamp_event: Defines an event that requests the insertion
+ *		     of a timestamp into the trace stream.
+ */
+struct etm_config {
+	u32				mode;
+	u32				ctrl;
+	u32				trigger_event;
+	u32				startstop_ctrl;
+	u32				enable_event;
+	u32				enable_ctrl1;
+	u32				enable_ctrl2;
+	u32				fifofull_level;
+	u8				addr_idx;
+	u32				addr_val[ETM_MAX_ADDR_CMP];
+	u32				addr_acctype[ETM_MAX_ADDR_CMP];
+	u32				addr_type[ETM_MAX_ADDR_CMP];
+	u8				cntr_idx;
+	u32				cntr_rld_val[ETM_MAX_CNTR];
+	u32				cntr_event[ETM_MAX_CNTR];
+	u32				cntr_rld_event[ETM_MAX_CNTR];
+	u32				cntr_val[ETM_MAX_CNTR];
+	u32				seq_12_event;
+	u32				seq_21_event;
+	u32				seq_23_event;
+	u32				seq_31_event;
+	u32				seq_32_event;
+	u32				seq_13_event;
+	u32				seq_curr_state;
+	u8				ctxid_idx;
+	u32				ctxid_pid[ETM_MAX_CTXID_CMP];
+	u32				ctxid_mask;
+	u32				sync_freq;
+	u32				timestamp_event;
+};
+
+/**
+ * struct etm_drvdata - specifics associated to an ETM component
+ * @base:	memory mapped base address for this component.
+ * @atclk:	optional clock for the core parts of the ETM.
+ * @csdev:	component vitals needed by the framework.
+ * @spinlock:	only one at a time pls.
+ * @cpu:	the cpu this component is affined to.
+ * @port_size:	port size as reported by ETMCR bit 4-6 and 21.
+ * @arch:	ETM/PTM version number.
+ * @use_cpu14:	true if management registers need to be accessed via CP14.
+ * @mode:	this tracer's mode, i.e sysFS, Perf or disabled.
+ * @sticky_enable: true if ETM base configuration has been done.
+ * @boot_enable:true if we should start tracing at boot time.
+ * @os_unlock:	true if access to management registers is allowed.
+ * @nr_addr_cmp:Number of pairs of address comparators as found in ETMCCR.
+ * @nr_cntr:	Number of counters as found in ETMCCR bit 13-15.
+ * @nr_ext_inp:	Number of external input as found in ETMCCR bit 17-19.
+ * @nr_ext_out:	Number of external output as found in ETMCCR bit 20-22.
+ * @nr_ctxid_cmp: Number of contextID comparators as found in ETMCCR bit 24-25.
+ * @etmccr:	value of register ETMCCR.
+ * @etmccer:	value of register ETMCCER.
+ * @traceid:	value of the current ID for this component.
+ * @config:	structure holding configuration parameters.
+ */
+struct etm_drvdata {
+	void __iomem			*base;
+	struct clk			*atclk;
+	struct coresight_device		*csdev;
+	spinlock_t			spinlock;
+	int				cpu;
+	int				port_size;
+	u8				arch;
+	bool				use_cp14;
+	local_t				mode;
+	bool				sticky_enable;
+	bool				boot_enable;
+	bool				os_unlock;
+	u8				nr_addr_cmp;
+	u8				nr_cntr;
+	u8				nr_ext_inp;
+	u8				nr_ext_out;
+	u8				nr_ctxid_cmp;
+	u32				etmccr;
+	u32				etmccer;
+	u32				traceid;
+	struct etm_config		config;
+};
+
+static inline void etm_writel(struct etm_drvdata *drvdata,
+			      u32 val, u32 off)
+{
+	if (drvdata->use_cp14) {
+		if (etm_writel_cp14(off, val)) {
+			dev_err(&drvdata->csdev->dev,
+				"invalid CP14 access to ETM reg: %#x", off);
+		}
+	} else {
+		writel_relaxed(val, drvdata->base + off);
+	}
+}
+
+static inline unsigned int etm_readl(struct etm_drvdata *drvdata, u32 off)
+{
+	u32 val;
+
+	if (drvdata->use_cp14) {
+		if (etm_readl_cp14(off, &val)) {
+			dev_err(&drvdata->csdev->dev,
+				"invalid CP14 access to ETM reg: %#x", off);
+		}
+	} else {
+		val = readl_relaxed(drvdata->base + off);
+	}
+
+	return val;
+}
+
+extern const struct attribute_group *coresight_etm_groups[];
+void etm_set_default(struct etm_config *config);
+void etm_config_trace_mode(struct etm_config *config);
+struct etm_config *get_etm_config(struct etm_drvdata *drvdata);
+int etm_read_alloc_trace_id(struct etm_drvdata *drvdata);
+void etm_release_trace_id(struct etm_drvdata *drvdata);
+#endif
diff --git a/drivers/hwtracing/coresight/coresight-etm3x-core.c b/drivers/hwtracing/coresight/coresight-etm3x-core.c
new file mode 100644
index 0000000000..116a91d90a
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etm3x-core.c
@@ -0,0 +1,1054 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ *
+ * Description: CoreSight Program Flow Trace driver
+ */
+
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/smp.h>
+#include <linux/sysfs.h>
+#include <linux/stat.h>
+#include <linux/pm_runtime.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/coresight.h>
+#include <linux/coresight-pmu.h>
+#include <linux/amba/bus.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+#include <linux/clk.h>
+#include <linux/perf_event.h>
+#include <asm/sections.h>
+
+#include "coresight-etm.h"
+#include "coresight-etm-perf.h"
+#include "coresight-trace-id.h"
+
+/*
+ * Not really modular but using module_param is the easiest way to
+ * remain consistent with existing use cases for now.
+ */
+static int boot_enable;
+module_param_named(boot_enable, boot_enable, int, S_IRUGO);
+
+static struct etm_drvdata *etmdrvdata[NR_CPUS];
+
+static enum cpuhp_state hp_online;
+
+/*
+ * Memory mapped writes to clear os lock are not supported on some processors
+ * and OS lock must be unlocked before any memory mapped access on such
+ * processors, otherwise memory mapped reads/writes will be invalid.
+ */
+static void etm_os_unlock(struct etm_drvdata *drvdata)
+{
+	/* Writing any value to ETMOSLAR unlocks the trace registers */
+	etm_writel(drvdata, 0x0, ETMOSLAR);
+	drvdata->os_unlock = true;
+	isb();
+}
+
+static void etm_set_pwrdwn(struct etm_drvdata *drvdata)
+{
+	u32 etmcr;
+
+	/* Ensure pending cp14 accesses complete before setting pwrdwn */
+	mb();
+	isb();
+	etmcr = etm_readl(drvdata, ETMCR);
+	etmcr |= ETMCR_PWD_DWN;
+	etm_writel(drvdata, etmcr, ETMCR);
+}
+
+static void etm_clr_pwrdwn(struct etm_drvdata *drvdata)
+{
+	u32 etmcr;
+
+	etmcr = etm_readl(drvdata, ETMCR);
+	etmcr &= ~ETMCR_PWD_DWN;
+	etm_writel(drvdata, etmcr, ETMCR);
+	/* Ensure pwrup completes before subsequent cp14 accesses */
+	mb();
+	isb();
+}
+
+static void etm_set_pwrup(struct etm_drvdata *drvdata)
+{
+	u32 etmpdcr;
+
+	etmpdcr = readl_relaxed(drvdata->base + ETMPDCR);
+	etmpdcr |= ETMPDCR_PWD_UP;
+	writel_relaxed(etmpdcr, drvdata->base + ETMPDCR);
+	/* Ensure pwrup completes before subsequent cp14 accesses */
+	mb();
+	isb();
+}
+
+static void etm_clr_pwrup(struct etm_drvdata *drvdata)
+{
+	u32 etmpdcr;
+
+	/* Ensure pending cp14 accesses complete before clearing pwrup */
+	mb();
+	isb();
+	etmpdcr = readl_relaxed(drvdata->base + ETMPDCR);
+	etmpdcr &= ~ETMPDCR_PWD_UP;
+	writel_relaxed(etmpdcr, drvdata->base + ETMPDCR);
+}
+
+/**
+ * coresight_timeout_etm - loop until a bit has changed to a specific state.
+ * @drvdata: etm's private data structure.
+ * @offset: address of a register, starting from @addr.
+ * @position: the position of the bit of interest.
+ * @value: the value the bit should have.
+ *
+ * Basically the same as @coresight_timeout except for the register access
+ * method where we have to account for CP14 configurations.
+
+ * Return: 0 as soon as the bit has taken the desired state or -EAGAIN if
+ * TIMEOUT_US has elapsed, which ever happens first.
+ */
+
+static int coresight_timeout_etm(struct etm_drvdata *drvdata, u32 offset,
+				  int position, int value)
+{
+	int i;
+	u32 val;
+
+	for (i = TIMEOUT_US; i > 0; i--) {
+		val = etm_readl(drvdata, offset);
+		/* Waiting on the bit to go from 0 to 1 */
+		if (value) {
+			if (val & BIT(position))
+				return 0;
+		/* Waiting on the bit to go from 1 to 0 */
+		} else {
+			if (!(val & BIT(position)))
+				return 0;
+		}
+
+		/*
+		 * Delay is arbitrary - the specification doesn't say how long
+		 * we are expected to wait.  Extra check required to make sure
+		 * we don't wait needlessly on the last iteration.
+		 */
+		if (i - 1)
+			udelay(1);
+	}
+
+	return -EAGAIN;
+}
+
+
+static void etm_set_prog(struct etm_drvdata *drvdata)
+{
+	u32 etmcr;
+
+	etmcr = etm_readl(drvdata, ETMCR);
+	etmcr |= ETMCR_ETM_PRG;
+	etm_writel(drvdata, etmcr, ETMCR);
+	/*
+	 * Recommended by spec for cp14 accesses to ensure etmcr write is
+	 * complete before polling etmsr
+	 */
+	isb();
+	if (coresight_timeout_etm(drvdata, ETMSR, ETMSR_PROG_BIT, 1)) {
+		dev_err(&drvdata->csdev->dev,
+			"%s: timeout observed when probing at offset %#x\n",
+			__func__, ETMSR);
+	}
+}
+
+static void etm_clr_prog(struct etm_drvdata *drvdata)
+{
+	u32 etmcr;
+
+	etmcr = etm_readl(drvdata, ETMCR);
+	etmcr &= ~ETMCR_ETM_PRG;
+	etm_writel(drvdata, etmcr, ETMCR);
+	/*
+	 * Recommended by spec for cp14 accesses to ensure etmcr write is
+	 * complete before polling etmsr
+	 */
+	isb();
+	if (coresight_timeout_etm(drvdata, ETMSR, ETMSR_PROG_BIT, 0)) {
+		dev_err(&drvdata->csdev->dev,
+			"%s: timeout observed when probing at offset %#x\n",
+			__func__, ETMSR);
+	}
+}
+
+void etm_set_default(struct etm_config *config)
+{
+	int i;
+
+	if (WARN_ON_ONCE(!config))
+		return;
+
+	/*
+	 * Taken verbatim from the TRM:
+	 *
+	 * To trace all memory:
+	 *  set bit [24] in register 0x009, the ETMTECR1, to 1
+	 *  set all other bits in register 0x009, the ETMTECR1, to 0
+	 *  set all bits in register 0x007, the ETMTECR2, to 0
+	 *  set register 0x008, the ETMTEEVR, to 0x6F (TRUE).
+	 */
+	config->enable_ctrl1 = ETMTECR1_INC_EXC;
+	config->enable_ctrl2 = 0x0;
+	config->enable_event = ETM_HARD_WIRE_RES_A;
+
+	config->trigger_event = ETM_DEFAULT_EVENT_VAL;
+	config->enable_event = ETM_HARD_WIRE_RES_A;
+
+	config->seq_12_event = ETM_DEFAULT_EVENT_VAL;
+	config->seq_21_event = ETM_DEFAULT_EVENT_VAL;
+	config->seq_23_event = ETM_DEFAULT_EVENT_VAL;
+	config->seq_31_event = ETM_DEFAULT_EVENT_VAL;
+	config->seq_32_event = ETM_DEFAULT_EVENT_VAL;
+	config->seq_13_event = ETM_DEFAULT_EVENT_VAL;
+	config->timestamp_event = ETM_DEFAULT_EVENT_VAL;
+
+	for (i = 0; i < ETM_MAX_CNTR; i++) {
+		config->cntr_rld_val[i] = 0x0;
+		config->cntr_event[i] = ETM_DEFAULT_EVENT_VAL;
+		config->cntr_rld_event[i] = ETM_DEFAULT_EVENT_VAL;
+		config->cntr_val[i] = 0x0;
+	}
+
+	config->seq_curr_state = 0x0;
+	config->ctxid_idx = 0x0;
+	for (i = 0; i < ETM_MAX_CTXID_CMP; i++)
+		config->ctxid_pid[i] = 0x0;
+
+	config->ctxid_mask = 0x0;
+	/* Setting default to 1024 as per TRM recommendation */
+	config->sync_freq = 0x400;
+}
+
+void etm_config_trace_mode(struct etm_config *config)
+{
+	u32 flags, mode;
+
+	mode = config->mode;
+
+	mode &= (ETM_MODE_EXCL_KERN | ETM_MODE_EXCL_USER);
+
+	/* excluding kernel AND user space doesn't make sense */
+	if (mode == (ETM_MODE_EXCL_KERN | ETM_MODE_EXCL_USER))
+		return;
+
+	/* nothing to do if neither flags are set */
+	if (!(mode & ETM_MODE_EXCL_KERN) && !(mode & ETM_MODE_EXCL_USER))
+		return;
+
+	flags = (1 << 0 |	/* instruction execute */
+		 3 << 3 |	/* ARM instruction */
+		 0 << 5 |	/* No data value comparison */
+		 0 << 7 |	/* No exact mach */
+		 0 << 8);	/* Ignore context ID */
+
+	/* No need to worry about single address comparators. */
+	config->enable_ctrl2 = 0x0;
+
+	/* Bit 0 is address range comparator 1 */
+	config->enable_ctrl1 = ETMTECR1_ADDR_COMP_1;
+
+	/*
+	 * On ETMv3.5:
+	 * ETMACTRn[13,11] == Non-secure state comparison control
+	 * ETMACTRn[12,10] == Secure state comparison control
+	 *
+	 * b00 == Match in all modes in this state
+	 * b01 == Do not match in any more in this state
+	 * b10 == Match in all modes excepts user mode in this state
+	 * b11 == Match only in user mode in this state
+	 */
+
+	/* Tracing in secure mode is not supported at this time */
+	flags |= (0 << 12 | 1 << 10);
+
+	if (mode & ETM_MODE_EXCL_USER) {
+		/* exclude user, match all modes except user mode */
+		flags |= (1 << 13 | 0 << 11);
+	} else {
+		/* exclude kernel, match only in user mode */
+		flags |= (1 << 13 | 1 << 11);
+	}
+
+	/*
+	 * The ETMEEVR register is already set to "hard wire A".  As such
+	 * all there is to do is setup an address comparator that spans
+	 * the entire address range and configure the state and mode bits.
+	 */
+	config->addr_val[0] = (u32) 0x0;
+	config->addr_val[1] = (u32) ~0x0;
+	config->addr_acctype[0] = flags;
+	config->addr_acctype[1] = flags;
+	config->addr_type[0] = ETM_ADDR_TYPE_RANGE;
+	config->addr_type[1] = ETM_ADDR_TYPE_RANGE;
+}
+
+#define ETM3X_SUPPORTED_OPTIONS (ETMCR_CYC_ACC | \
+				 ETMCR_TIMESTAMP_EN | \
+				 ETMCR_RETURN_STACK)
+
+static int etm_parse_event_config(struct etm_drvdata *drvdata,
+				  struct perf_event *event)
+{
+	struct etm_config *config = &drvdata->config;
+	struct perf_event_attr *attr = &event->attr;
+
+	if (!attr)
+		return -EINVAL;
+
+	/* Clear configuration from previous run */
+	memset(config, 0, sizeof(struct etm_config));
+
+	if (attr->exclude_kernel)
+		config->mode = ETM_MODE_EXCL_KERN;
+
+	if (attr->exclude_user)
+		config->mode = ETM_MODE_EXCL_USER;
+
+	/* Always start from the default config */
+	etm_set_default(config);
+
+	/*
+	 * By default the tracers are configured to trace the whole address
+	 * range.  Narrow the field only if requested by user space.
+	 */
+	if (config->mode)
+		etm_config_trace_mode(config);
+
+	/*
+	 * At this time only cycle accurate, return stack  and timestamp
+	 * options are available.
+	 */
+	if (attr->config & ~ETM3X_SUPPORTED_OPTIONS)
+		return -EINVAL;
+
+	config->ctrl = attr->config;
+
+	/* Don't trace contextID when runs in non-root PID namespace */
+	if (!task_is_in_init_pid_ns(current))
+		config->ctrl &= ~ETMCR_CTXID_SIZE;
+
+	/*
+	 * Possible to have cores with PTM (supports ret stack) and ETM
+	 * (never has ret stack) on the same SoC. So if we have a request
+	 * for return stack that can't be honoured on this core then
+	 * clear the bit - trace will still continue normally
+	 */
+	if ((config->ctrl & ETMCR_RETURN_STACK) &&
+	    !(drvdata->etmccer & ETMCCER_RETSTACK))
+		config->ctrl &= ~ETMCR_RETURN_STACK;
+
+	return 0;
+}
+
+static int etm_enable_hw(struct etm_drvdata *drvdata)
+{
+	int i, rc;
+	u32 etmcr;
+	struct etm_config *config = &drvdata->config;
+	struct coresight_device *csdev = drvdata->csdev;
+
+	CS_UNLOCK(drvdata->base);
+
+	rc = coresight_claim_device_unlocked(csdev);
+	if (rc)
+		goto done;
+
+	/* Turn engine on */
+	etm_clr_pwrdwn(drvdata);
+	/* Apply power to trace registers */
+	etm_set_pwrup(drvdata);
+	/* Make sure all registers are accessible */
+	etm_os_unlock(drvdata);
+
+	etm_set_prog(drvdata);
+
+	etmcr = etm_readl(drvdata, ETMCR);
+	/* Clear setting from a previous run if need be */
+	etmcr &= ~ETM3X_SUPPORTED_OPTIONS;
+	etmcr |= drvdata->port_size;
+	etmcr |= ETMCR_ETM_EN;
+	etm_writel(drvdata, config->ctrl | etmcr, ETMCR);
+	etm_writel(drvdata, config->trigger_event, ETMTRIGGER);
+	etm_writel(drvdata, config->startstop_ctrl, ETMTSSCR);
+	etm_writel(drvdata, config->enable_event, ETMTEEVR);
+	etm_writel(drvdata, config->enable_ctrl1, ETMTECR1);
+	etm_writel(drvdata, config->fifofull_level, ETMFFLR);
+	for (i = 0; i < drvdata->nr_addr_cmp; i++) {
+		etm_writel(drvdata, config->addr_val[i], ETMACVRn(i));
+		etm_writel(drvdata, config->addr_acctype[i], ETMACTRn(i));
+	}
+	for (i = 0; i < drvdata->nr_cntr; i++) {
+		etm_writel(drvdata, config->cntr_rld_val[i], ETMCNTRLDVRn(i));
+		etm_writel(drvdata, config->cntr_event[i], ETMCNTENRn(i));
+		etm_writel(drvdata, config->cntr_rld_event[i],
+			   ETMCNTRLDEVRn(i));
+		etm_writel(drvdata, config->cntr_val[i], ETMCNTVRn(i));
+	}
+	etm_writel(drvdata, config->seq_12_event, ETMSQ12EVR);
+	etm_writel(drvdata, config->seq_21_event, ETMSQ21EVR);
+	etm_writel(drvdata, config->seq_23_event, ETMSQ23EVR);
+	etm_writel(drvdata, config->seq_31_event, ETMSQ31EVR);
+	etm_writel(drvdata, config->seq_32_event, ETMSQ32EVR);
+	etm_writel(drvdata, config->seq_13_event, ETMSQ13EVR);
+	etm_writel(drvdata, config->seq_curr_state, ETMSQR);
+	for (i = 0; i < drvdata->nr_ext_out; i++)
+		etm_writel(drvdata, ETM_DEFAULT_EVENT_VAL, ETMEXTOUTEVRn(i));
+	for (i = 0; i < drvdata->nr_ctxid_cmp; i++)
+		etm_writel(drvdata, config->ctxid_pid[i], ETMCIDCVRn(i));
+	etm_writel(drvdata, config->ctxid_mask, ETMCIDCMR);
+	etm_writel(drvdata, config->sync_freq, ETMSYNCFR);
+	/* No external input selected */
+	etm_writel(drvdata, 0x0, ETMEXTINSELR);
+	etm_writel(drvdata, config->timestamp_event, ETMTSEVR);
+	/* No auxiliary control selected */
+	etm_writel(drvdata, 0x0, ETMAUXCR);
+	etm_writel(drvdata, drvdata->traceid, ETMTRACEIDR);
+	/* No VMID comparator value selected */
+	etm_writel(drvdata, 0x0, ETMVMIDCVR);
+
+	etm_clr_prog(drvdata);
+
+done:
+	CS_LOCK(drvdata->base);
+
+	dev_dbg(&drvdata->csdev->dev, "cpu: %d enable smp call done: %d\n",
+		drvdata->cpu, rc);
+	return rc;
+}
+
+struct etm_enable_arg {
+	struct etm_drvdata *drvdata;
+	int rc;
+};
+
+static void etm_enable_hw_smp_call(void *info)
+{
+	struct etm_enable_arg *arg = info;
+
+	if (WARN_ON(!arg))
+		return;
+	arg->rc = etm_enable_hw(arg->drvdata);
+}
+
+static int etm_cpu_id(struct coresight_device *csdev)
+{
+	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	return drvdata->cpu;
+}
+
+int etm_read_alloc_trace_id(struct etm_drvdata *drvdata)
+{
+	int trace_id;
+
+	/*
+	 * This will allocate a trace ID to the cpu,
+	 * or return the one currently allocated.
+	 *
+	 * trace id function has its own lock
+	 */
+	trace_id = coresight_trace_id_get_cpu_id(drvdata->cpu);
+	if (IS_VALID_CS_TRACE_ID(trace_id))
+		drvdata->traceid = (u8)trace_id;
+	else
+		dev_err(&drvdata->csdev->dev,
+			"Failed to allocate trace ID for %s on CPU%d\n",
+			dev_name(&drvdata->csdev->dev), drvdata->cpu);
+	return trace_id;
+}
+
+void etm_release_trace_id(struct etm_drvdata *drvdata)
+{
+	coresight_trace_id_put_cpu_id(drvdata->cpu);
+}
+
+static int etm_enable_perf(struct coresight_device *csdev,
+			   struct perf_event *event)
+{
+	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	int trace_id;
+
+	if (WARN_ON_ONCE(drvdata->cpu != smp_processor_id()))
+		return -EINVAL;
+
+	/* Configure the tracer based on the session's specifics */
+	etm_parse_event_config(drvdata, event);
+
+	/*
+	 * perf allocates cpu ids as part of _setup_aux() - device needs to use
+	 * the allocated ID. This reads the current version without allocation.
+	 *
+	 * This does not use the trace id lock to prevent lock_dep issues
+	 * with perf locks - we know the ID cannot change until perf shuts down
+	 * the session
+	 */
+	trace_id = coresight_trace_id_read_cpu_id(drvdata->cpu);
+	if (!IS_VALID_CS_TRACE_ID(trace_id)) {
+		dev_err(&drvdata->csdev->dev, "Failed to set trace ID for %s on CPU%d\n",
+			dev_name(&drvdata->csdev->dev), drvdata->cpu);
+		return -EINVAL;
+	}
+	drvdata->traceid = (u8)trace_id;
+
+	/* And enable it */
+	return etm_enable_hw(drvdata);
+}
+
+static int etm_enable_sysfs(struct coresight_device *csdev)
+{
+	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct etm_enable_arg arg = { };
+	int ret;
+
+	spin_lock(&drvdata->spinlock);
+
+	/* sysfs needs to allocate and set a trace ID */
+	ret = etm_read_alloc_trace_id(drvdata);
+	if (ret < 0)
+		goto unlock_enable_sysfs;
+
+	/*
+	 * Configure the ETM only if the CPU is online.  If it isn't online
+	 * hw configuration will take place on the local CPU during bring up.
+	 */
+	if (cpu_online(drvdata->cpu)) {
+		arg.drvdata = drvdata;
+		ret = smp_call_function_single(drvdata->cpu,
+					       etm_enable_hw_smp_call, &arg, 1);
+		if (!ret)
+			ret = arg.rc;
+		if (!ret)
+			drvdata->sticky_enable = true;
+	} else {
+		ret = -ENODEV;
+	}
+
+	if (ret)
+		etm_release_trace_id(drvdata);
+
+unlock_enable_sysfs:
+	spin_unlock(&drvdata->spinlock);
+
+	if (!ret)
+		dev_dbg(&csdev->dev, "ETM tracing enabled\n");
+	return ret;
+}
+
+static int etm_enable(struct coresight_device *csdev, struct perf_event *event,
+		      enum cs_mode mode)
+{
+	int ret;
+	u32 val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	val = local_cmpxchg(&drvdata->mode, CS_MODE_DISABLED, mode);
+
+	/* Someone is already using the tracer */
+	if (val)
+		return -EBUSY;
+
+	switch (mode) {
+	case CS_MODE_SYSFS:
+		ret = etm_enable_sysfs(csdev);
+		break;
+	case CS_MODE_PERF:
+		ret = etm_enable_perf(csdev, event);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	/* The tracer didn't start */
+	if (ret)
+		local_set(&drvdata->mode, CS_MODE_DISABLED);
+
+	return ret;
+}
+
+static void etm_disable_hw(void *info)
+{
+	int i;
+	struct etm_drvdata *drvdata = info;
+	struct etm_config *config = &drvdata->config;
+	struct coresight_device *csdev = drvdata->csdev;
+
+	CS_UNLOCK(drvdata->base);
+	etm_set_prog(drvdata);
+
+	/* Read back sequencer and counters for post trace analysis */
+	config->seq_curr_state = (etm_readl(drvdata, ETMSQR) & ETM_SQR_MASK);
+
+	for (i = 0; i < drvdata->nr_cntr; i++)
+		config->cntr_val[i] = etm_readl(drvdata, ETMCNTVRn(i));
+
+	etm_set_pwrdwn(drvdata);
+	coresight_disclaim_device_unlocked(csdev);
+
+	CS_LOCK(drvdata->base);
+
+	dev_dbg(&drvdata->csdev->dev,
+		"cpu: %d disable smp call done\n", drvdata->cpu);
+}
+
+static void etm_disable_perf(struct coresight_device *csdev)
+{
+	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	if (WARN_ON_ONCE(drvdata->cpu != smp_processor_id()))
+		return;
+
+	CS_UNLOCK(drvdata->base);
+
+	/* Setting the prog bit disables tracing immediately */
+	etm_set_prog(drvdata);
+
+	/*
+	 * There is no way to know when the tracer will be used again so
+	 * power down the tracer.
+	 */
+	etm_set_pwrdwn(drvdata);
+	coresight_disclaim_device_unlocked(csdev);
+
+	CS_LOCK(drvdata->base);
+
+	/*
+	 * perf will release trace ids when _free_aux()
+	 * is called at the end of the session
+	 */
+
+}
+
+static void etm_disable_sysfs(struct coresight_device *csdev)
+{
+	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	/*
+	 * Taking hotplug lock here protects from clocks getting disabled
+	 * with tracing being left on (crash scenario) if user disable occurs
+	 * after cpu online mask indicates the cpu is offline but before the
+	 * DYING hotplug callback is serviced by the ETM driver.
+	 */
+	cpus_read_lock();
+	spin_lock(&drvdata->spinlock);
+
+	/*
+	 * Executing etm_disable_hw on the cpu whose ETM is being disabled
+	 * ensures that register writes occur when cpu is powered.
+	 */
+	smp_call_function_single(drvdata->cpu, etm_disable_hw, drvdata, 1);
+
+	spin_unlock(&drvdata->spinlock);
+	cpus_read_unlock();
+
+	/*
+	 * we only release trace IDs when resetting sysfs.
+	 * This permits sysfs users to read the trace ID after the trace
+	 * session has completed. This maintains operational behaviour with
+	 * prior trace id allocation method
+	 */
+
+	dev_dbg(&csdev->dev, "ETM tracing disabled\n");
+}
+
+static void etm_disable(struct coresight_device *csdev,
+			struct perf_event *event)
+{
+	enum cs_mode mode;
+	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	/*
+	 * For as long as the tracer isn't disabled another entity can't
+	 * change its status.  As such we can read the status here without
+	 * fearing it will change under us.
+	 */
+	mode = local_read(&drvdata->mode);
+
+	switch (mode) {
+	case CS_MODE_DISABLED:
+		break;
+	case CS_MODE_SYSFS:
+		etm_disable_sysfs(csdev);
+		break;
+	case CS_MODE_PERF:
+		etm_disable_perf(csdev);
+		break;
+	default:
+		WARN_ON_ONCE(mode);
+		return;
+	}
+
+	if (mode)
+		local_set(&drvdata->mode, CS_MODE_DISABLED);
+}
+
+static const struct coresight_ops_source etm_source_ops = {
+	.cpu_id		= etm_cpu_id,
+	.enable		= etm_enable,
+	.disable	= etm_disable,
+};
+
+static const struct coresight_ops etm_cs_ops = {
+	.source_ops	= &etm_source_ops,
+};
+
+static int etm_online_cpu(unsigned int cpu)
+{
+	if (!etmdrvdata[cpu])
+		return 0;
+
+	if (etmdrvdata[cpu]->boot_enable && !etmdrvdata[cpu]->sticky_enable)
+		coresight_enable(etmdrvdata[cpu]->csdev);
+	return 0;
+}
+
+static int etm_starting_cpu(unsigned int cpu)
+{
+	if (!etmdrvdata[cpu])
+		return 0;
+
+	spin_lock(&etmdrvdata[cpu]->spinlock);
+	if (!etmdrvdata[cpu]->os_unlock) {
+		etm_os_unlock(etmdrvdata[cpu]);
+		etmdrvdata[cpu]->os_unlock = true;
+	}
+
+	if (local_read(&etmdrvdata[cpu]->mode))
+		etm_enable_hw(etmdrvdata[cpu]);
+	spin_unlock(&etmdrvdata[cpu]->spinlock);
+	return 0;
+}
+
+static int etm_dying_cpu(unsigned int cpu)
+{
+	if (!etmdrvdata[cpu])
+		return 0;
+
+	spin_lock(&etmdrvdata[cpu]->spinlock);
+	if (local_read(&etmdrvdata[cpu]->mode))
+		etm_disable_hw(etmdrvdata[cpu]);
+	spin_unlock(&etmdrvdata[cpu]->spinlock);
+	return 0;
+}
+
+static bool etm_arch_supported(u8 arch)
+{
+	switch (arch) {
+	case ETM_ARCH_V3_3:
+		break;
+	case ETM_ARCH_V3_5:
+		break;
+	case PFT_ARCH_V1_0:
+		break;
+	case PFT_ARCH_V1_1:
+		break;
+	default:
+		return false;
+	}
+	return true;
+}
+
+static void etm_init_arch_data(void *info)
+{
+	u32 etmidr;
+	u32 etmccr;
+	struct etm_drvdata *drvdata = info;
+
+	/* Make sure all registers are accessible */
+	etm_os_unlock(drvdata);
+
+	CS_UNLOCK(drvdata->base);
+
+	/* First dummy read */
+	(void)etm_readl(drvdata, ETMPDSR);
+	/* Provide power to ETM: ETMPDCR[3] == 1 */
+	etm_set_pwrup(drvdata);
+	/*
+	 * Clear power down bit since when this bit is set writes to
+	 * certain registers might be ignored.
+	 */
+	etm_clr_pwrdwn(drvdata);
+	/*
+	 * Set prog bit. It will be set from reset but this is included to
+	 * ensure it is set
+	 */
+	etm_set_prog(drvdata);
+
+	/* Find all capabilities */
+	etmidr = etm_readl(drvdata, ETMIDR);
+	drvdata->arch = BMVAL(etmidr, 4, 11);
+	drvdata->port_size = etm_readl(drvdata, ETMCR) & PORT_SIZE_MASK;
+
+	drvdata->etmccer = etm_readl(drvdata, ETMCCER);
+	etmccr = etm_readl(drvdata, ETMCCR);
+	drvdata->etmccr = etmccr;
+	drvdata->nr_addr_cmp = BMVAL(etmccr, 0, 3) * 2;
+	drvdata->nr_cntr = BMVAL(etmccr, 13, 15);
+	drvdata->nr_ext_inp = BMVAL(etmccr, 17, 19);
+	drvdata->nr_ext_out = BMVAL(etmccr, 20, 22);
+	drvdata->nr_ctxid_cmp = BMVAL(etmccr, 24, 25);
+
+	etm_set_pwrdwn(drvdata);
+	etm_clr_pwrup(drvdata);
+	CS_LOCK(drvdata->base);
+}
+
+static int __init etm_hp_setup(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ARM_CORESIGHT_STARTING,
+						   "arm/coresight:starting",
+						   etm_starting_cpu, etm_dying_cpu);
+
+	if (ret)
+		return ret;
+
+	ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
+						   "arm/coresight:online",
+						   etm_online_cpu, NULL);
+
+	/* HP dyn state ID returned in ret on success */
+	if (ret > 0) {
+		hp_online = ret;
+		return 0;
+	}
+
+	/* failed dyn state - remove others */
+	cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING);
+
+	return ret;
+}
+
+static void etm_hp_clear(void)
+{
+	cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING);
+	if (hp_online) {
+		cpuhp_remove_state_nocalls(hp_online);
+		hp_online = 0;
+	}
+}
+
+static int etm_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	int ret;
+	void __iomem *base;
+	struct device *dev = &adev->dev;
+	struct coresight_platform_data *pdata = NULL;
+	struct etm_drvdata *drvdata;
+	struct resource *res = &adev->res;
+	struct coresight_desc desc = { 0 };
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	drvdata->use_cp14 = fwnode_property_read_bool(dev->fwnode, "arm,cp14");
+	dev_set_drvdata(dev, drvdata);
+
+	/* Validity for the resource is already checked by the AMBA core */
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	drvdata->base = base;
+	desc.access = CSDEV_ACCESS_IOMEM(base);
+
+	spin_lock_init(&drvdata->spinlock);
+
+	drvdata->atclk = devm_clk_get(&adev->dev, "atclk"); /* optional */
+	if (!IS_ERR(drvdata->atclk)) {
+		ret = clk_prepare_enable(drvdata->atclk);
+		if (ret)
+			return ret;
+	}
+
+	drvdata->cpu = coresight_get_cpu(dev);
+	if (drvdata->cpu < 0)
+		return drvdata->cpu;
+
+	desc.name  = devm_kasprintf(dev, GFP_KERNEL, "etm%d", drvdata->cpu);
+	if (!desc.name)
+		return -ENOMEM;
+
+	if (smp_call_function_single(drvdata->cpu,
+				     etm_init_arch_data,  drvdata, 1))
+		dev_err(dev, "ETM arch init failed\n");
+
+	if (etm_arch_supported(drvdata->arch) == false)
+		return -EINVAL;
+
+	etm_set_default(&drvdata->config);
+
+	pdata = coresight_get_platform_data(dev);
+	if (IS_ERR(pdata))
+		return PTR_ERR(pdata);
+
+	adev->dev.platform_data = pdata;
+
+	desc.type = CORESIGHT_DEV_TYPE_SOURCE;
+	desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_PROC;
+	desc.ops = &etm_cs_ops;
+	desc.pdata = pdata;
+	desc.dev = dev;
+	desc.groups = coresight_etm_groups;
+	drvdata->csdev = coresight_register(&desc);
+	if (IS_ERR(drvdata->csdev))
+		return PTR_ERR(drvdata->csdev);
+
+	ret = etm_perf_symlink(drvdata->csdev, true);
+	if (ret) {
+		coresight_unregister(drvdata->csdev);
+		return ret;
+	}
+
+	etmdrvdata[drvdata->cpu] = drvdata;
+
+	pm_runtime_put(&adev->dev);
+	dev_info(&drvdata->csdev->dev,
+		 "%s initialized\n", (char *)coresight_get_uci_data(id));
+	if (boot_enable) {
+		coresight_enable(drvdata->csdev);
+		drvdata->boot_enable = true;
+	}
+
+	return 0;
+}
+
+static void clear_etmdrvdata(void *info)
+{
+	int cpu = *(int *)info;
+
+	etmdrvdata[cpu] = NULL;
+}
+
+static void etm_remove(struct amba_device *adev)
+{
+	struct etm_drvdata *drvdata = dev_get_drvdata(&adev->dev);
+
+	etm_perf_symlink(drvdata->csdev, false);
+
+	/*
+	 * Taking hotplug lock here to avoid racing between etm_remove and
+	 * CPU hotplug call backs.
+	 */
+	cpus_read_lock();
+	/*
+	 * The readers for etmdrvdata[] are CPU hotplug call backs
+	 * and PM notification call backs. Change etmdrvdata[i] on
+	 * CPU i ensures these call backs has consistent view
+	 * inside one call back function.
+	 */
+	if (smp_call_function_single(drvdata->cpu, clear_etmdrvdata, &drvdata->cpu, 1))
+		etmdrvdata[drvdata->cpu] = NULL;
+
+	cpus_read_unlock();
+
+	coresight_unregister(drvdata->csdev);
+}
+
+#ifdef CONFIG_PM
+static int etm_runtime_suspend(struct device *dev)
+{
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata && !IS_ERR(drvdata->atclk))
+		clk_disable_unprepare(drvdata->atclk);
+
+	return 0;
+}
+
+static int etm_runtime_resume(struct device *dev)
+{
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata && !IS_ERR(drvdata->atclk))
+		clk_prepare_enable(drvdata->atclk);
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops etm_dev_pm_ops = {
+	SET_RUNTIME_PM_OPS(etm_runtime_suspend, etm_runtime_resume, NULL)
+};
+
+static const struct amba_id etm_ids[] = {
+	/* ETM 3.3 */
+	CS_AMBA_ID_DATA(0x000bb921, "ETM 3.3"),
+	/* ETM 3.5 - Cortex-A5 */
+	CS_AMBA_ID_DATA(0x000bb955, "ETM 3.5"),
+	/* ETM 3.5 */
+	CS_AMBA_ID_DATA(0x000bb956, "ETM 3.5"),
+	/* PTM 1.0 */
+	CS_AMBA_ID_DATA(0x000bb950, "PTM 1.0"),
+	/* PTM 1.1 */
+	CS_AMBA_ID_DATA(0x000bb95f, "PTM 1.1"),
+	/* PTM 1.1 Qualcomm */
+	CS_AMBA_ID_DATA(0x000b006f, "PTM 1.1"),
+	{ 0, 0},
+};
+
+MODULE_DEVICE_TABLE(amba, etm_ids);
+
+static struct amba_driver etm_driver = {
+	.drv = {
+		.name	= "coresight-etm3x",
+		.owner	= THIS_MODULE,
+		.pm	= &etm_dev_pm_ops,
+		.suppress_bind_attrs = true,
+	},
+	.probe		= etm_probe,
+	.remove         = etm_remove,
+	.id_table	= etm_ids,
+};
+
+static int __init etm_init(void)
+{
+	int ret;
+
+	ret = etm_hp_setup();
+
+	/* etm_hp_setup() does its own cleanup - exit on error */
+	if (ret)
+		return ret;
+
+	ret = amba_driver_register(&etm_driver);
+	if (ret) {
+		pr_err("Error registering etm3x driver\n");
+		etm_hp_clear();
+	}
+
+	return ret;
+}
+
+static void __exit etm_exit(void)
+{
+	amba_driver_unregister(&etm_driver);
+	etm_hp_clear();
+}
+
+module_init(etm_init);
+module_exit(etm_exit);
+
+MODULE_AUTHOR("Pratik Patel <pratikp@codeaurora.org>");
+MODULE_AUTHOR("Mathieu Poirier <mathieu.poirier@linaro.org>");
+MODULE_DESCRIPTION("Arm CoreSight Program Flow Trace driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c
new file mode 100644
index 0000000000..2f271b7fb0
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c
@@ -0,0 +1,1269 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#include <linux/pid_namespace.h>
+#include <linux/pm_runtime.h>
+#include <linux/sysfs.h>
+#include "coresight-etm.h"
+#include "coresight-priv.h"
+
+static ssize_t nr_addr_cmp_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->nr_addr_cmp;
+	return sprintf(buf, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(nr_addr_cmp);
+
+static ssize_t nr_cntr_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->nr_cntr;
+	return sprintf(buf, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(nr_cntr);
+
+static ssize_t nr_ctxid_cmp_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->nr_ctxid_cmp;
+	return sprintf(buf, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(nr_ctxid_cmp);
+
+static ssize_t etmsr_show(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	unsigned long flags, val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	pm_runtime_get_sync(dev->parent);
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	CS_UNLOCK(drvdata->base);
+
+	val = etm_readl(drvdata, ETMSR);
+
+	CS_LOCK(drvdata->base);
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+	pm_runtime_put(dev->parent);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(etmsr);
+
+static ssize_t reset_store(struct device *dev,
+			   struct device_attribute *attr,
+			   const char *buf, size_t size)
+{
+	int i, ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	if (val) {
+		spin_lock(&drvdata->spinlock);
+		memset(config, 0, sizeof(struct etm_config));
+		config->mode = ETM_MODE_EXCLUDE;
+		config->trigger_event = ETM_DEFAULT_EVENT_VAL;
+		for (i = 0; i < drvdata->nr_addr_cmp; i++) {
+			config->addr_type[i] = ETM_ADDR_TYPE_NONE;
+		}
+
+		etm_set_default(config);
+		etm_release_trace_id(drvdata);
+		spin_unlock(&drvdata->spinlock);
+	}
+
+	return size;
+}
+static DEVICE_ATTR_WO(reset);
+
+static ssize_t mode_show(struct device *dev,
+			 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->mode;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t mode_store(struct device *dev,
+			  struct device_attribute *attr,
+			  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	config->mode = val & ETM_MODE_ALL;
+
+	if (config->mode & ETM_MODE_EXCLUDE)
+		config->enable_ctrl1 |= ETMTECR1_INC_EXC;
+	else
+		config->enable_ctrl1 &= ~ETMTECR1_INC_EXC;
+
+	if (config->mode & ETM_MODE_CYCACC)
+		config->ctrl |= ETMCR_CYC_ACC;
+	else
+		config->ctrl &= ~ETMCR_CYC_ACC;
+
+	if (config->mode & ETM_MODE_STALL) {
+		if (!(drvdata->etmccr & ETMCCR_FIFOFULL)) {
+			dev_warn(dev, "stall mode not supported\n");
+			ret = -EINVAL;
+			goto err_unlock;
+		}
+		config->ctrl |= ETMCR_STALL_MODE;
+	} else
+		config->ctrl &= ~ETMCR_STALL_MODE;
+
+	if (config->mode & ETM_MODE_TIMESTAMP) {
+		if (!(drvdata->etmccer & ETMCCER_TIMESTAMP)) {
+			dev_warn(dev, "timestamp not supported\n");
+			ret = -EINVAL;
+			goto err_unlock;
+		}
+		config->ctrl |= ETMCR_TIMESTAMP_EN;
+	} else
+		config->ctrl &= ~ETMCR_TIMESTAMP_EN;
+
+	if (config->mode & ETM_MODE_CTXID)
+		config->ctrl |= ETMCR_CTXID_SIZE;
+	else
+		config->ctrl &= ~ETMCR_CTXID_SIZE;
+
+	if (config->mode & ETM_MODE_BBROAD)
+		config->ctrl |= ETMCR_BRANCH_BROADCAST;
+	else
+		config->ctrl &= ~ETMCR_BRANCH_BROADCAST;
+
+	if (config->mode & ETM_MODE_RET_STACK)
+		config->ctrl |= ETMCR_RETURN_STACK;
+	else
+		config->ctrl &= ~ETMCR_RETURN_STACK;
+
+	if (config->mode & (ETM_MODE_EXCL_KERN | ETM_MODE_EXCL_USER))
+		etm_config_trace_mode(config);
+
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+
+err_unlock:
+	spin_unlock(&drvdata->spinlock);
+	return ret;
+}
+static DEVICE_ATTR_RW(mode);
+
+static ssize_t trigger_event_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->trigger_event;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t trigger_event_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	config->trigger_event = val & ETM_EVENT_MASK;
+
+	return size;
+}
+static DEVICE_ATTR_RW(trigger_event);
+
+static ssize_t enable_event_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->enable_event;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t enable_event_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	config->enable_event = val & ETM_EVENT_MASK;
+
+	return size;
+}
+static DEVICE_ATTR_RW(enable_event);
+
+static ssize_t fifofull_level_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->fifofull_level;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t fifofull_level_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	config->fifofull_level = val;
+
+	return size;
+}
+static DEVICE_ATTR_RW(fifofull_level);
+
+static ssize_t addr_idx_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->addr_idx;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t addr_idx_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	if (val >= drvdata->nr_addr_cmp)
+		return -EINVAL;
+
+	/*
+	 * Use spinlock to ensure index doesn't change while it gets
+	 * dereferenced multiple times within a spinlock block elsewhere.
+	 */
+	spin_lock(&drvdata->spinlock);
+	config->addr_idx = val;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(addr_idx);
+
+static ssize_t addr_single_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      config->addr_type[idx] == ETM_ADDR_TYPE_SINGLE)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EINVAL;
+	}
+
+	val = config->addr_val[idx];
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t addr_single_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t size)
+{
+	u8 idx;
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      config->addr_type[idx] == ETM_ADDR_TYPE_SINGLE)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EINVAL;
+	}
+
+	config->addr_val[idx] = val;
+	config->addr_type[idx] = ETM_ADDR_TYPE_SINGLE;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(addr_single);
+
+static ssize_t addr_range_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	u8 idx;
+	unsigned long val1, val2;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (idx % 2 != 0) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+	if (!((config->addr_type[idx] == ETM_ADDR_TYPE_NONE &&
+	       config->addr_type[idx + 1] == ETM_ADDR_TYPE_NONE) ||
+	      (config->addr_type[idx] == ETM_ADDR_TYPE_RANGE &&
+	       config->addr_type[idx + 1] == ETM_ADDR_TYPE_RANGE))) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	val1 = config->addr_val[idx];
+	val2 = config->addr_val[idx + 1];
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%#lx %#lx\n", val1, val2);
+}
+
+static ssize_t addr_range_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long val1, val2;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	if (sscanf(buf, "%lx %lx", &val1, &val2) != 2)
+		return -EINVAL;
+	/* Lower address comparator cannot have a higher address value */
+	if (val1 > val2)
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (idx % 2 != 0) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+	if (!((config->addr_type[idx] == ETM_ADDR_TYPE_NONE &&
+	       config->addr_type[idx + 1] == ETM_ADDR_TYPE_NONE) ||
+	      (config->addr_type[idx] == ETM_ADDR_TYPE_RANGE &&
+	       config->addr_type[idx + 1] == ETM_ADDR_TYPE_RANGE))) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	config->addr_val[idx] = val1;
+	config->addr_type[idx] = ETM_ADDR_TYPE_RANGE;
+	config->addr_val[idx + 1] = val2;
+	config->addr_type[idx + 1] = ETM_ADDR_TYPE_RANGE;
+	config->enable_ctrl1 |= (1 << (idx/2));
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(addr_range);
+
+static ssize_t addr_start_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      config->addr_type[idx] == ETM_ADDR_TYPE_START)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	val = config->addr_val[idx];
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t addr_start_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t size)
+{
+	u8 idx;
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      config->addr_type[idx] == ETM_ADDR_TYPE_START)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	config->addr_val[idx] = val;
+	config->addr_type[idx] = ETM_ADDR_TYPE_START;
+	config->startstop_ctrl |= (1 << idx);
+	config->enable_ctrl1 |= ETMTECR1_START_STOP;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(addr_start);
+
+static ssize_t addr_stop_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      config->addr_type[idx] == ETM_ADDR_TYPE_STOP)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	val = config->addr_val[idx];
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t addr_stop_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t size)
+{
+	u8 idx;
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      config->addr_type[idx] == ETM_ADDR_TYPE_STOP)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	config->addr_val[idx] = val;
+	config->addr_type[idx] = ETM_ADDR_TYPE_STOP;
+	config->startstop_ctrl |= (1 << (idx + 16));
+	config->enable_ctrl1 |= ETMTECR1_START_STOP;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(addr_stop);
+
+static ssize_t addr_acctype_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	val = config->addr_acctype[config->addr_idx];
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t addr_acctype_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	config->addr_acctype[config->addr_idx] = val;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(addr_acctype);
+
+static ssize_t cntr_idx_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->cntr_idx;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t cntr_idx_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	if (val >= drvdata->nr_cntr)
+		return -EINVAL;
+	/*
+	 * Use spinlock to ensure index doesn't change while it gets
+	 * dereferenced multiple times within a spinlock block elsewhere.
+	 */
+	spin_lock(&drvdata->spinlock);
+	config->cntr_idx = val;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(cntr_idx);
+
+static ssize_t cntr_rld_val_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	val = config->cntr_rld_val[config->cntr_idx];
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t cntr_rld_val_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	config->cntr_rld_val[config->cntr_idx] = val;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(cntr_rld_val);
+
+static ssize_t cntr_event_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	val = config->cntr_event[config->cntr_idx];
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t cntr_event_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	config->cntr_event[config->cntr_idx] = val & ETM_EVENT_MASK;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(cntr_event);
+
+static ssize_t cntr_rld_event_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	val = config->cntr_rld_event[config->cntr_idx];
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t cntr_rld_event_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	config->cntr_rld_event[config->cntr_idx] = val & ETM_EVENT_MASK;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(cntr_rld_event);
+
+static ssize_t cntr_val_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	int i, ret = 0;
+	u32 val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	if (!local_read(&drvdata->mode)) {
+		spin_lock(&drvdata->spinlock);
+		for (i = 0; i < drvdata->nr_cntr; i++)
+			ret += sprintf(buf, "counter %d: %x\n",
+				       i, config->cntr_val[i]);
+		spin_unlock(&drvdata->spinlock);
+		return ret;
+	}
+
+	for (i = 0; i < drvdata->nr_cntr; i++) {
+		val = etm_readl(drvdata, ETMCNTVRn(i));
+		ret += sprintf(buf, "counter %d: %x\n", i, val);
+	}
+
+	return ret;
+}
+
+static ssize_t cntr_val_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	config->cntr_val[config->cntr_idx] = val;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(cntr_val);
+
+static ssize_t seq_12_event_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->seq_12_event;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t seq_12_event_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	config->seq_12_event = val & ETM_EVENT_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(seq_12_event);
+
+static ssize_t seq_21_event_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->seq_21_event;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t seq_21_event_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	config->seq_21_event = val & ETM_EVENT_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(seq_21_event);
+
+static ssize_t seq_23_event_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->seq_23_event;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t seq_23_event_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	config->seq_23_event = val & ETM_EVENT_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(seq_23_event);
+
+static ssize_t seq_31_event_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->seq_31_event;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t seq_31_event_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	config->seq_31_event = val & ETM_EVENT_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(seq_31_event);
+
+static ssize_t seq_32_event_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->seq_32_event;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t seq_32_event_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	config->seq_32_event = val & ETM_EVENT_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(seq_32_event);
+
+static ssize_t seq_13_event_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->seq_13_event;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t seq_13_event_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	config->seq_13_event = val & ETM_EVENT_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(seq_13_event);
+
+static ssize_t seq_curr_state_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	unsigned long val, flags;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	if (!local_read(&drvdata->mode)) {
+		val = config->seq_curr_state;
+		goto out;
+	}
+
+	pm_runtime_get_sync(dev->parent);
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	CS_UNLOCK(drvdata->base);
+	val = (etm_readl(drvdata, ETMSQR) & ETM_SQR_MASK);
+	CS_LOCK(drvdata->base);
+
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+	pm_runtime_put(dev->parent);
+out:
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t seq_curr_state_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	if (val > ETM_SEQ_STATE_MAX_VAL)
+		return -EINVAL;
+
+	config->seq_curr_state = val;
+
+	return size;
+}
+static DEVICE_ATTR_RW(seq_curr_state);
+
+static ssize_t ctxid_idx_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->ctxid_idx;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t ctxid_idx_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	if (val >= drvdata->nr_ctxid_cmp)
+		return -EINVAL;
+
+	/*
+	 * Use spinlock to ensure index doesn't change while it gets
+	 * dereferenced multiple times within a spinlock block elsewhere.
+	 */
+	spin_lock(&drvdata->spinlock);
+	config->ctxid_idx = val;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(ctxid_idx);
+
+static ssize_t ctxid_pid_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	/*
+	 * Don't use contextID tracing if coming from a PID namespace.  See
+	 * comment in ctxid_pid_store().
+	 */
+	if (task_active_pid_ns(current) != &init_pid_ns)
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	val = config->ctxid_pid[config->ctxid_idx];
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t ctxid_pid_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t size)
+{
+	int ret;
+	unsigned long pid;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	/*
+	 * When contextID tracing is enabled the tracers will insert the
+	 * value found in the contextID register in the trace stream.  But if
+	 * a process is in a namespace the PID of that process as seen from the
+	 * namespace won't be what the kernel sees, something that makes the
+	 * feature confusing and can potentially leak kernel only information.
+	 * As such refuse to use the feature if @current is not in the initial
+	 * PID namespace.
+	 */
+	if (task_active_pid_ns(current) != &init_pid_ns)
+		return -EINVAL;
+
+	ret = kstrtoul(buf, 16, &pid);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	config->ctxid_pid[config->ctxid_idx] = pid;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(ctxid_pid);
+
+static ssize_t ctxid_mask_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	/*
+	 * Don't use contextID tracing if coming from a PID namespace.  See
+	 * comment in ctxid_pid_store().
+	 */
+	if (task_active_pid_ns(current) != &init_pid_ns)
+		return -EINVAL;
+
+	val = config->ctxid_mask;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t ctxid_mask_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	/*
+	 * Don't use contextID tracing if coming from a PID namespace.  See
+	 * comment in ctxid_pid_store().
+	 */
+	if (task_active_pid_ns(current) != &init_pid_ns)
+		return -EINVAL;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	config->ctxid_mask = val;
+	return size;
+}
+static DEVICE_ATTR_RW(ctxid_mask);
+
+static ssize_t sync_freq_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->sync_freq;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t sync_freq_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	config->sync_freq = val & ETM_SYNC_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(sync_freq);
+
+static ssize_t timestamp_event_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	val = config->timestamp_event;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t timestamp_event_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etm_config *config = &drvdata->config;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	config->timestamp_event = val & ETM_EVENT_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(timestamp_event);
+
+static ssize_t cpu_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	int val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->cpu;
+	return scnprintf(buf, PAGE_SIZE, "%d\n", val);
+
+}
+static DEVICE_ATTR_RO(cpu);
+
+static ssize_t traceid_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	int trace_id;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	trace_id = etm_read_alloc_trace_id(drvdata);
+	if (trace_id < 0)
+		return trace_id;
+
+	return sysfs_emit(buf, "%#x\n", trace_id);
+}
+static DEVICE_ATTR_RO(traceid);
+
+static struct attribute *coresight_etm_attrs[] = {
+	&dev_attr_nr_addr_cmp.attr,
+	&dev_attr_nr_cntr.attr,
+	&dev_attr_nr_ctxid_cmp.attr,
+	&dev_attr_etmsr.attr,
+	&dev_attr_reset.attr,
+	&dev_attr_mode.attr,
+	&dev_attr_trigger_event.attr,
+	&dev_attr_enable_event.attr,
+	&dev_attr_fifofull_level.attr,
+	&dev_attr_addr_idx.attr,
+	&dev_attr_addr_single.attr,
+	&dev_attr_addr_range.attr,
+	&dev_attr_addr_start.attr,
+	&dev_attr_addr_stop.attr,
+	&dev_attr_addr_acctype.attr,
+	&dev_attr_cntr_idx.attr,
+	&dev_attr_cntr_rld_val.attr,
+	&dev_attr_cntr_event.attr,
+	&dev_attr_cntr_rld_event.attr,
+	&dev_attr_cntr_val.attr,
+	&dev_attr_seq_12_event.attr,
+	&dev_attr_seq_21_event.attr,
+	&dev_attr_seq_23_event.attr,
+	&dev_attr_seq_31_event.attr,
+	&dev_attr_seq_32_event.attr,
+	&dev_attr_seq_13_event.attr,
+	&dev_attr_seq_curr_state.attr,
+	&dev_attr_ctxid_idx.attr,
+	&dev_attr_ctxid_pid.attr,
+	&dev_attr_ctxid_mask.attr,
+	&dev_attr_sync_freq.attr,
+	&dev_attr_timestamp_event.attr,
+	&dev_attr_traceid.attr,
+	&dev_attr_cpu.attr,
+	NULL,
+};
+
+static struct attribute *coresight_etm_mgmt_attrs[] = {
+	coresight_simple_reg32(etmccr, ETMCCR),
+	coresight_simple_reg32(etmccer, ETMCCER),
+	coresight_simple_reg32(etmscr, ETMSCR),
+	coresight_simple_reg32(etmidr, ETMIDR),
+	coresight_simple_reg32(etmcr, ETMCR),
+	coresight_simple_reg32(etmtraceidr, ETMTRACEIDR),
+	coresight_simple_reg32(etmteevr, ETMTEEVR),
+	coresight_simple_reg32(etmtssvr, ETMTSSCR),
+	coresight_simple_reg32(etmtecr1, ETMTECR1),
+	coresight_simple_reg32(etmtecr2, ETMTECR2),
+	NULL,
+};
+
+static const struct attribute_group coresight_etm_group = {
+	.attrs = coresight_etm_attrs,
+};
+
+static const struct attribute_group coresight_etm_mgmt_group = {
+	.attrs = coresight_etm_mgmt_attrs,
+	.name = "mgmt",
+};
+
+const struct attribute_group *coresight_etm_groups[] = {
+	&coresight_etm_group,
+	&coresight_etm_mgmt_group,
+	NULL,
+};
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-cfg.c b/drivers/hwtracing/coresight/coresight-etm4x-cfg.c
new file mode 100644
index 0000000000..c302072b29
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etm4x-cfg.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2020 Linaro Limited. All rights reserved.
+ * Author: Mike Leach <mike.leach@linaro.org>
+ */
+
+#include "coresight-etm4x.h"
+#include "coresight-etm4x-cfg.h"
+#include "coresight-priv.h"
+#include "coresight-syscfg.h"
+
+/* defines to associate register IDs with driver data locations */
+#define CHECKREG(cval, elem) \
+	{ \
+		if (offset == cval) { \
+			reg_csdev->driver_regval = &drvcfg->elem; \
+			err = 0; \
+			break; \
+		} \
+	}
+
+#define CHECKREGIDX(cval, elem, off_idx, mask)	\
+	{ \
+		if (mask == cval) { \
+			reg_csdev->driver_regval = &drvcfg->elem[off_idx]; \
+			err = 0; \
+			break; \
+		} \
+	}
+
+/**
+ * etm4_cfg_map_reg_offset - validate and map the register offset into a
+ *			     location in the driver config struct.
+ *
+ * Limits the number of registers that can be accessed and programmed in
+ * features, to those which are used to control the trace capture parameters.
+ *
+ * Omits or limits access to those which the driver must use exclusively.
+ *
+ * Invalid offsets will result in fail code return and feature load failure.
+ *
+ * @drvdata:	driver data to map into.
+ * @reg_csdev:	register to map.
+ * @offset:	device offset for the register
+ */
+static int etm4_cfg_map_reg_offset(struct etmv4_drvdata *drvdata,
+				   struct cscfg_regval_csdev *reg_csdev, u32 offset)
+{
+	int err = -EINVAL, idx;
+	struct etmv4_config *drvcfg = &drvdata->config;
+	u32 off_mask;
+
+	if (((offset >= TRCEVENTCTL0R) && (offset <= TRCVIPCSSCTLR)) ||
+	    ((offset >= TRCSEQRSTEVR) && (offset <= TRCEXTINSELR)) ||
+	    ((offset >= TRCCIDCCTLR0) && (offset <= TRCVMIDCCTLR1))) {
+		do {
+			CHECKREG(TRCEVENTCTL0R, eventctrl0);
+			CHECKREG(TRCEVENTCTL1R, eventctrl1);
+			CHECKREG(TRCSTALLCTLR, stall_ctrl);
+			CHECKREG(TRCTSCTLR, ts_ctrl);
+			CHECKREG(TRCSYNCPR, syncfreq);
+			CHECKREG(TRCCCCTLR, ccctlr);
+			CHECKREG(TRCBBCTLR, bb_ctrl);
+			CHECKREG(TRCVICTLR, vinst_ctrl);
+			CHECKREG(TRCVIIECTLR, viiectlr);
+			CHECKREG(TRCVISSCTLR, vissctlr);
+			CHECKREG(TRCVIPCSSCTLR, vipcssctlr);
+			CHECKREG(TRCSEQRSTEVR, seq_rst);
+			CHECKREG(TRCSEQSTR, seq_state);
+			CHECKREG(TRCEXTINSELR, ext_inp);
+			CHECKREG(TRCCIDCCTLR0, ctxid_mask0);
+			CHECKREG(TRCCIDCCTLR1, ctxid_mask1);
+			CHECKREG(TRCVMIDCCTLR0, vmid_mask0);
+			CHECKREG(TRCVMIDCCTLR1, vmid_mask1);
+		} while (0);
+	} else if ((offset & GENMASK(11, 4)) == TRCSEQEVRn(0)) {
+		/* sequencer state control registers */
+		idx = (offset & GENMASK(3, 0)) / 4;
+		if (idx < ETM_MAX_SEQ_STATES) {
+			reg_csdev->driver_regval = &drvcfg->seq_ctrl[idx];
+			err = 0;
+		}
+	} else if ((offset >= TRCSSCCRn(0)) && (offset <= TRCSSPCICRn(7))) {
+		/* 32 bit, 8 off indexed register sets */
+		idx = (offset & GENMASK(4, 0)) / 4;
+		off_mask =  (offset & GENMASK(11, 5));
+		do {
+			CHECKREGIDX(TRCSSCCRn(0), ss_ctrl, idx, off_mask);
+			CHECKREGIDX(TRCSSCSRn(0), ss_status, idx, off_mask);
+			CHECKREGIDX(TRCSSPCICRn(0), ss_pe_cmp, idx, off_mask);
+		} while (0);
+	} else if ((offset >= TRCCIDCVRn(0)) && (offset <= TRCVMIDCVRn(7))) {
+		/* 64 bit, 8 off indexed register sets */
+		idx = (offset & GENMASK(5, 0)) / 8;
+		off_mask = (offset & GENMASK(11, 6));
+		do {
+			CHECKREGIDX(TRCCIDCVRn(0), ctxid_pid, idx, off_mask);
+			CHECKREGIDX(TRCVMIDCVRn(0), vmid_val, idx, off_mask);
+		} while (0);
+	} else if ((offset >= TRCRSCTLRn(2)) &&
+		   (offset <= TRCRSCTLRn((ETM_MAX_RES_SEL - 1)))) {
+		/* 32 bit resource selection regs, 32 off, skip fixed 0,1 */
+		idx = (offset & GENMASK(6, 0)) / 4;
+		if (idx < ETM_MAX_RES_SEL) {
+			reg_csdev->driver_regval = &drvcfg->res_ctrl[idx];
+			err = 0;
+		}
+	} else if ((offset >= TRCACVRn(0)) &&
+		   (offset <= TRCACATRn((ETM_MAX_SINGLE_ADDR_CMP - 1)))) {
+		/* 64 bit addr cmp regs, 16 off */
+		idx = (offset & GENMASK(6, 0)) / 8;
+		off_mask = offset & GENMASK(11, 7);
+		do {
+			CHECKREGIDX(TRCACVRn(0), addr_val, idx, off_mask);
+			CHECKREGIDX(TRCACATRn(0), addr_acc, idx, off_mask);
+		} while (0);
+	} else if ((offset >= TRCCNTRLDVRn(0)) &&
+		   (offset <= TRCCNTVRn((ETMv4_MAX_CNTR - 1)))) {
+		/* 32 bit counter regs, 4 off (ETMv4_MAX_CNTR - 1) */
+		idx = (offset &  GENMASK(3, 0)) / 4;
+		off_mask = offset &  GENMASK(11, 4);
+		do {
+			CHECKREGIDX(TRCCNTRLDVRn(0), cntrldvr, idx, off_mask);
+			CHECKREGIDX(TRCCNTCTLRn(0), cntr_ctrl, idx, off_mask);
+			CHECKREGIDX(TRCCNTVRn(0), cntr_val, idx, off_mask);
+		} while (0);
+	}
+	return err;
+}
+
+/**
+ * etm4_cfg_load_feature - load a feature into a device instance.
+ *
+ * @csdev:	An ETMv4 CoreSight device.
+ * @feat_csdev:	The feature to be loaded.
+ *
+ * The function will load a feature instance into the device, checking that
+ * the register definitions are valid for the device.
+ *
+ * Parameter and register definitions will be converted into internal
+ * structures that are used to set the values in the driver when the
+ * feature is enabled for the device.
+ *
+ * The feature spinlock pointer is initialised to the same spinlock
+ * that the driver uses to protect the internal register values.
+ */
+static int etm4_cfg_load_feature(struct coresight_device *csdev,
+				 struct cscfg_feature_csdev *feat_csdev)
+{
+	struct device *dev = csdev->dev.parent;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev);
+	const struct cscfg_feature_desc *feat_desc = feat_csdev->feat_desc;
+	u32 offset;
+	int i = 0, err = 0;
+
+	/*
+	 * essential we set the device spinlock - this is used in the generic
+	 * programming routines when copying values into the drvdata structures
+	 * via the pointers setup in etm4_cfg_map_reg_offset().
+	 */
+	feat_csdev->drv_spinlock = &drvdata->spinlock;
+
+	/* process the register descriptions */
+	for (i = 0; i < feat_csdev->nr_regs && !err; i++) {
+		offset = feat_desc->regs_desc[i].offset;
+		err = etm4_cfg_map_reg_offset(drvdata, &feat_csdev->regs_csdev[i], offset);
+	}
+	return err;
+}
+
+/* match information when loading configurations */
+#define CS_CFG_ETM4_MATCH_FLAGS	(CS_CFG_MATCH_CLASS_SRC_ALL | \
+				 CS_CFG_MATCH_CLASS_SRC_ETM4)
+
+int etm4_cscfg_register(struct coresight_device *csdev)
+{
+	struct cscfg_csdev_feat_ops ops;
+
+	ops.load_feat = &etm4_cfg_load_feature;
+
+	return cscfg_register_csdev(csdev, CS_CFG_ETM4_MATCH_FLAGS, &ops);
+}
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-cfg.h b/drivers/hwtracing/coresight/coresight-etm4x-cfg.h
new file mode 100644
index 0000000000..32dab34c1d
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etm4x-cfg.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2014-2020, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _CORESIGHT_ETM4X_CFG_H
+#define _CORESIGHT_ETM4X_CFG_H
+
+#include "coresight-config.h"
+#include "coresight-etm4x.h"
+
+/* ETMv4 specific config defines */
+
+/* resource IDs */
+
+#define ETM4_CFG_RES_CTR	0x001
+#define ETM4_CFG_RES_CMP	0x002
+#define ETM4_CFG_RES_CMP_PAIR0	0x003
+#define ETM4_CFG_RES_CMP_PAIR1	0x004
+#define ETM4_CFG_RES_SEL	0x005
+#define ETM4_CFG_RES_SEL_PAIR0	0x006
+#define ETM4_CFG_RES_SEL_PAIR1	0x007
+#define ETM4_CFG_RES_SEQ	0x008
+#define ETM4_CFG_RES_TS		0x009
+#define ETM4_CFG_RES_MASK	0x00F
+
+/* ETMv4 specific config functions */
+int etm4_cscfg_register(struct coresight_device *csdev);
+
+#endif /* CORESIGHT_ETM4X_CFG_H */
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
new file mode 100644
index 0000000000..34aee59dd1
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -0,0 +1,2412 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2014, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/smp.h>
+#include <linux/sysfs.h>
+#include <linux/stat.h>
+#include <linux/clk.h>
+#include <linux/cpu.h>
+#include <linux/cpu_pm.h>
+#include <linux/coresight.h>
+#include <linux/coresight-pmu.h>
+#include <linux/pm_wakeup.h>
+#include <linux/amba/bus.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/property.h>
+#include <linux/clk/clk-conf.h>
+
+#include <asm/barrier.h>
+#include <asm/sections.h>
+#include <asm/sysreg.h>
+#include <asm/local.h>
+#include <asm/virt.h>
+
+#include "coresight-etm4x.h"
+#include "coresight-etm-perf.h"
+#include "coresight-etm4x-cfg.h"
+#include "coresight-self-hosted-trace.h"
+#include "coresight-syscfg.h"
+#include "coresight-trace-id.h"
+
+static int boot_enable;
+module_param(boot_enable, int, 0444);
+MODULE_PARM_DESC(boot_enable, "Enable tracing on boot");
+
+#define PARAM_PM_SAVE_FIRMWARE	  0 /* save self-hosted state as per firmware */
+#define PARAM_PM_SAVE_NEVER	  1 /* never save any state */
+#define PARAM_PM_SAVE_SELF_HOSTED 2 /* save self-hosted state only */
+
+static int pm_save_enable = PARAM_PM_SAVE_FIRMWARE;
+module_param(pm_save_enable, int, 0444);
+MODULE_PARM_DESC(pm_save_enable,
+	"Save/restore state on power down: 1 = never, 2 = self-hosted");
+
+static struct etmv4_drvdata *etmdrvdata[NR_CPUS];
+static void etm4_set_default_config(struct etmv4_config *config);
+static int etm4_set_event_filters(struct etmv4_drvdata *drvdata,
+				  struct perf_event *event);
+static u64 etm4_get_access_type(struct etmv4_config *config);
+
+static enum cpuhp_state hp_online;
+
+struct etm4_init_arg {
+	struct device		*dev;
+	struct csdev_access	*csa;
+};
+
+static DEFINE_PER_CPU(struct etm4_init_arg *, delayed_probe);
+static int etm4_probe_cpu(unsigned int cpu);
+
+/*
+ * Check if TRCSSPCICRn(i) is implemented for a given instance.
+ *
+ * TRCSSPCICRn is implemented only if :
+ *	TRCSSPCICR<n> is present only if all of the following are true:
+ *		TRCIDR4.NUMSSCC > n.
+ *		TRCIDR4.NUMPC > 0b0000 .
+ *		TRCSSCSR<n>.PC == 0b1
+ */
+static inline bool etm4x_sspcicrn_present(struct etmv4_drvdata *drvdata, int n)
+{
+	return (n < drvdata->nr_ss_cmp) &&
+	       drvdata->nr_pe &&
+	       (drvdata->config.ss_status[n] & TRCSSCSRn_PC);
+}
+
+u64 etm4x_sysreg_read(u32 offset, bool _relaxed, bool _64bit)
+{
+	u64 res = 0;
+
+	switch (offset) {
+	ETM4x_READ_SYSREG_CASES(res)
+	default :
+		pr_warn_ratelimited("etm4x: trying to read unsupported register @%x\n",
+			 offset);
+	}
+
+	if (!_relaxed)
+		__io_ar(res);	/* Imitate the !relaxed I/O helpers */
+
+	return res;
+}
+
+void etm4x_sysreg_write(u64 val, u32 offset, bool _relaxed, bool _64bit)
+{
+	if (!_relaxed)
+		__io_bw();	/* Imitate the !relaxed I/O helpers */
+	if (!_64bit)
+		val &= GENMASK(31, 0);
+
+	switch (offset) {
+	ETM4x_WRITE_SYSREG_CASES(val)
+	default :
+		pr_warn_ratelimited("etm4x: trying to write to unsupported register @%x\n",
+			offset);
+	}
+}
+
+static u64 ete_sysreg_read(u32 offset, bool _relaxed, bool _64bit)
+{
+	u64 res = 0;
+
+	switch (offset) {
+	ETE_READ_CASES(res)
+	default :
+		pr_warn_ratelimited("ete: trying to read unsupported register @%x\n",
+				    offset);
+	}
+
+	if (!_relaxed)
+		__io_ar(res);	/* Imitate the !relaxed I/O helpers */
+
+	return res;
+}
+
+static void ete_sysreg_write(u64 val, u32 offset, bool _relaxed, bool _64bit)
+{
+	if (!_relaxed)
+		__io_bw();	/* Imitate the !relaxed I/O helpers */
+	if (!_64bit)
+		val &= GENMASK(31, 0);
+
+	switch (offset) {
+	ETE_WRITE_CASES(val)
+	default :
+		pr_warn_ratelimited("ete: trying to write to unsupported register @%x\n",
+				    offset);
+	}
+}
+
+static void etm_detect_os_lock(struct etmv4_drvdata *drvdata,
+			       struct csdev_access *csa)
+{
+	u32 oslsr = etm4x_relaxed_read32(csa, TRCOSLSR);
+
+	drvdata->os_lock_model = ETM_OSLSR_OSLM(oslsr);
+}
+
+static void etm_write_os_lock(struct etmv4_drvdata *drvdata,
+			      struct csdev_access *csa, u32 val)
+{
+	val = !!val;
+
+	switch (drvdata->os_lock_model) {
+	case ETM_OSLOCK_PRESENT:
+		etm4x_relaxed_write32(csa, val, TRCOSLAR);
+		break;
+	case ETM_OSLOCK_PE:
+		write_sysreg_s(val, SYS_OSLAR_EL1);
+		break;
+	default:
+		pr_warn_once("CPU%d: Unsupported Trace OSLock model: %x\n",
+			     smp_processor_id(), drvdata->os_lock_model);
+		fallthrough;
+	case ETM_OSLOCK_NI:
+		return;
+	}
+	isb();
+}
+
+static inline void etm4_os_unlock_csa(struct etmv4_drvdata *drvdata,
+				      struct csdev_access *csa)
+{
+	WARN_ON(drvdata->cpu != smp_processor_id());
+
+	/* Writing 0 to OS Lock unlocks the trace unit registers */
+	etm_write_os_lock(drvdata, csa, 0x0);
+	drvdata->os_unlock = true;
+}
+
+static void etm4_os_unlock(struct etmv4_drvdata *drvdata)
+{
+	if (!WARN_ON(!drvdata->csdev))
+		etm4_os_unlock_csa(drvdata, &drvdata->csdev->access);
+}
+
+static void etm4_os_lock(struct etmv4_drvdata *drvdata)
+{
+	if (WARN_ON(!drvdata->csdev))
+		return;
+	/* Writing 0x1 to OS Lock locks the trace registers */
+	etm_write_os_lock(drvdata, &drvdata->csdev->access, 0x1);
+	drvdata->os_unlock = false;
+}
+
+static void etm4_cs_lock(struct etmv4_drvdata *drvdata,
+			 struct csdev_access *csa)
+{
+	/* Software Lock is only accessible via memory mapped interface */
+	if (csa->io_mem)
+		CS_LOCK(csa->base);
+}
+
+static void etm4_cs_unlock(struct etmv4_drvdata *drvdata,
+			   struct csdev_access *csa)
+{
+	if (csa->io_mem)
+		CS_UNLOCK(csa->base);
+}
+
+static int etm4_cpu_id(struct coresight_device *csdev)
+{
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	return drvdata->cpu;
+}
+
+int etm4_read_alloc_trace_id(struct etmv4_drvdata *drvdata)
+{
+	int trace_id;
+
+	/*
+	 * This will allocate a trace ID to the cpu,
+	 * or return the one currently allocated.
+	 * The trace id function has its own lock
+	 */
+	trace_id = coresight_trace_id_get_cpu_id(drvdata->cpu);
+	if (IS_VALID_CS_TRACE_ID(trace_id))
+		drvdata->trcid = (u8)trace_id;
+	else
+		dev_err(&drvdata->csdev->dev,
+			"Failed to allocate trace ID for %s on CPU%d\n",
+			dev_name(&drvdata->csdev->dev), drvdata->cpu);
+	return trace_id;
+}
+
+void etm4_release_trace_id(struct etmv4_drvdata *drvdata)
+{
+	coresight_trace_id_put_cpu_id(drvdata->cpu);
+}
+
+struct etm4_enable_arg {
+	struct etmv4_drvdata *drvdata;
+	int rc;
+};
+
+/*
+ * etm4x_prohibit_trace - Prohibit the CPU from tracing at all ELs.
+ * When the CPU supports FEAT_TRF, we could move the ETM to a trace
+ * prohibited state by filtering the Exception levels via TRFCR_EL1.
+ */
+static void etm4x_prohibit_trace(struct etmv4_drvdata *drvdata)
+{
+	/* If the CPU doesn't support FEAT_TRF, nothing to do */
+	if (!drvdata->trfcr)
+		return;
+	cpu_prohibit_trace();
+}
+
+/*
+ * etm4x_allow_trace - Allow CPU tracing in the respective ELs,
+ * as configured by the drvdata->config.mode for the current
+ * session. Even though we have TRCVICTLR bits to filter the
+ * trace in the ELs, it doesn't prevent the ETM from generating
+ * a packet (e.g, TraceInfo) that might contain the addresses from
+ * the excluded levels. Thus we use the additional controls provided
+ * via the Trace Filtering controls (FEAT_TRF) to make sure no trace
+ * is generated for the excluded ELs.
+ */
+static void etm4x_allow_trace(struct etmv4_drvdata *drvdata)
+{
+	u64 trfcr = drvdata->trfcr;
+
+	/* If the CPU doesn't support FEAT_TRF, nothing to do */
+	if (!trfcr)
+		return;
+
+	if (drvdata->config.mode & ETM_MODE_EXCL_KERN)
+		trfcr &= ~TRFCR_ELx_ExTRE;
+	if (drvdata->config.mode & ETM_MODE_EXCL_USER)
+		trfcr &= ~TRFCR_ELx_E0TRE;
+
+	write_trfcr(trfcr);
+}
+
+#ifdef CONFIG_ETM4X_IMPDEF_FEATURE
+
+#define HISI_HIP08_AMBA_ID		0x000b6d01
+#define ETM4_AMBA_MASK			0xfffff
+#define HISI_HIP08_CORE_COMMIT_MASK	0x3000
+#define HISI_HIP08_CORE_COMMIT_SHIFT	12
+#define HISI_HIP08_CORE_COMMIT_FULL	0b00
+#define HISI_HIP08_CORE_COMMIT_LVL_1	0b01
+#define HISI_HIP08_CORE_COMMIT_REG	sys_reg(3, 1, 15, 2, 5)
+
+struct etm4_arch_features {
+	void (*arch_callback)(bool enable);
+};
+
+static bool etm4_hisi_match_pid(unsigned int id)
+{
+	return (id & ETM4_AMBA_MASK) == HISI_HIP08_AMBA_ID;
+}
+
+static void etm4_hisi_config_core_commit(bool enable)
+{
+	u8 commit = enable ? HISI_HIP08_CORE_COMMIT_LVL_1 :
+		    HISI_HIP08_CORE_COMMIT_FULL;
+	u64 val;
+
+	/*
+	 * bit 12 and 13 of HISI_HIP08_CORE_COMMIT_REG are used together
+	 * to set core-commit, 2'b00 means cpu is at full speed, 2'b01,
+	 * 2'b10, 2'b11 mean reduce pipeline speed, and 2'b01 means level-1
+	 * speed(minimun value). So bit 12 and 13 should be cleared together.
+	 */
+	val = read_sysreg_s(HISI_HIP08_CORE_COMMIT_REG);
+	val &= ~HISI_HIP08_CORE_COMMIT_MASK;
+	val |= commit << HISI_HIP08_CORE_COMMIT_SHIFT;
+	write_sysreg_s(val, HISI_HIP08_CORE_COMMIT_REG);
+}
+
+static struct etm4_arch_features etm4_features[] = {
+	[ETM4_IMPDEF_HISI_CORE_COMMIT] = {
+		.arch_callback = etm4_hisi_config_core_commit,
+	},
+	{},
+};
+
+static void etm4_enable_arch_specific(struct etmv4_drvdata *drvdata)
+{
+	struct etm4_arch_features *ftr;
+	int bit;
+
+	for_each_set_bit(bit, drvdata->arch_features, ETM4_IMPDEF_FEATURE_MAX) {
+		ftr = &etm4_features[bit];
+
+		if (ftr->arch_callback)
+			ftr->arch_callback(true);
+	}
+}
+
+static void etm4_disable_arch_specific(struct etmv4_drvdata *drvdata)
+{
+	struct etm4_arch_features *ftr;
+	int bit;
+
+	for_each_set_bit(bit, drvdata->arch_features, ETM4_IMPDEF_FEATURE_MAX) {
+		ftr = &etm4_features[bit];
+
+		if (ftr->arch_callback)
+			ftr->arch_callback(false);
+	}
+}
+
+static void etm4_check_arch_features(struct etmv4_drvdata *drvdata,
+				     struct csdev_access *csa)
+{
+	/*
+	 * TRCPIDR* registers are not required for ETMs with system
+	 * instructions. They must be identified by the MIDR+REVIDRs.
+	 * Skip the TRCPID checks for now.
+	 */
+	if (!csa->io_mem)
+		return;
+
+	if (etm4_hisi_match_pid(coresight_get_pid(csa)))
+		set_bit(ETM4_IMPDEF_HISI_CORE_COMMIT, drvdata->arch_features);
+}
+#else
+static void etm4_enable_arch_specific(struct etmv4_drvdata *drvdata)
+{
+}
+
+static void etm4_disable_arch_specific(struct etmv4_drvdata *drvdata)
+{
+}
+
+static void etm4_check_arch_features(struct etmv4_drvdata *drvdata,
+				     struct csdev_access *csa)
+{
+}
+#endif /* CONFIG_ETM4X_IMPDEF_FEATURE */
+
+static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
+{
+	int i, rc;
+	struct etmv4_config *config = &drvdata->config;
+	struct coresight_device *csdev = drvdata->csdev;
+	struct device *etm_dev = &csdev->dev;
+	struct csdev_access *csa = &csdev->access;
+
+
+	etm4_cs_unlock(drvdata, csa);
+	etm4_enable_arch_specific(drvdata);
+
+	etm4_os_unlock(drvdata);
+
+	rc = coresight_claim_device_unlocked(csdev);
+	if (rc)
+		goto done;
+
+	/* Disable the trace unit before programming trace registers */
+	etm4x_relaxed_write32(csa, 0, TRCPRGCTLR);
+
+	/*
+	 * If we use system instructions, we need to synchronize the
+	 * write to the TRCPRGCTLR, before accessing the TRCSTATR.
+	 * See ARM IHI0064F, section
+	 * "4.3.7 Synchronization of register updates"
+	 */
+	if (!csa->io_mem)
+		isb();
+
+	/* wait for TRCSTATR.IDLE to go up */
+	if (coresight_timeout(csa, TRCSTATR, TRCSTATR_IDLE_BIT, 1))
+		dev_err(etm_dev,
+			"timeout while waiting for Idle Trace Status\n");
+	if (drvdata->nr_pe)
+		etm4x_relaxed_write32(csa, config->pe_sel, TRCPROCSELR);
+	etm4x_relaxed_write32(csa, config->cfg, TRCCONFIGR);
+	/* nothing specific implemented */
+	etm4x_relaxed_write32(csa, 0x0, TRCAUXCTLR);
+	etm4x_relaxed_write32(csa, config->eventctrl0, TRCEVENTCTL0R);
+	etm4x_relaxed_write32(csa, config->eventctrl1, TRCEVENTCTL1R);
+	if (drvdata->stallctl)
+		etm4x_relaxed_write32(csa, config->stall_ctrl, TRCSTALLCTLR);
+	etm4x_relaxed_write32(csa, config->ts_ctrl, TRCTSCTLR);
+	etm4x_relaxed_write32(csa, config->syncfreq, TRCSYNCPR);
+	etm4x_relaxed_write32(csa, config->ccctlr, TRCCCCTLR);
+	etm4x_relaxed_write32(csa, config->bb_ctrl, TRCBBCTLR);
+	etm4x_relaxed_write32(csa, drvdata->trcid, TRCTRACEIDR);
+	etm4x_relaxed_write32(csa, config->vinst_ctrl, TRCVICTLR);
+	etm4x_relaxed_write32(csa, config->viiectlr, TRCVIIECTLR);
+	etm4x_relaxed_write32(csa, config->vissctlr, TRCVISSCTLR);
+	if (drvdata->nr_pe_cmp)
+		etm4x_relaxed_write32(csa, config->vipcssctlr, TRCVIPCSSCTLR);
+	for (i = 0; i < drvdata->nrseqstate - 1; i++)
+		etm4x_relaxed_write32(csa, config->seq_ctrl[i], TRCSEQEVRn(i));
+	if (drvdata->nrseqstate) {
+		etm4x_relaxed_write32(csa, config->seq_rst, TRCSEQRSTEVR);
+		etm4x_relaxed_write32(csa, config->seq_state, TRCSEQSTR);
+	}
+	etm4x_relaxed_write32(csa, config->ext_inp, TRCEXTINSELR);
+	for (i = 0; i < drvdata->nr_cntr; i++) {
+		etm4x_relaxed_write32(csa, config->cntrldvr[i], TRCCNTRLDVRn(i));
+		etm4x_relaxed_write32(csa, config->cntr_ctrl[i], TRCCNTCTLRn(i));
+		etm4x_relaxed_write32(csa, config->cntr_val[i], TRCCNTVRn(i));
+	}
+
+	/*
+	 * Resource selector pair 0 is always implemented and reserved.  As
+	 * such start at 2.
+	 */
+	for (i = 2; i < drvdata->nr_resource * 2; i++)
+		etm4x_relaxed_write32(csa, config->res_ctrl[i], TRCRSCTLRn(i));
+
+	for (i = 0; i < drvdata->nr_ss_cmp; i++) {
+		/* always clear status bit on restart if using single-shot */
+		if (config->ss_ctrl[i] || config->ss_pe_cmp[i])
+			config->ss_status[i] &= ~TRCSSCSRn_STATUS;
+		etm4x_relaxed_write32(csa, config->ss_ctrl[i], TRCSSCCRn(i));
+		etm4x_relaxed_write32(csa, config->ss_status[i], TRCSSCSRn(i));
+		if (etm4x_sspcicrn_present(drvdata, i))
+			etm4x_relaxed_write32(csa, config->ss_pe_cmp[i], TRCSSPCICRn(i));
+	}
+	for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
+		etm4x_relaxed_write64(csa, config->addr_val[i], TRCACVRn(i));
+		etm4x_relaxed_write64(csa, config->addr_acc[i], TRCACATRn(i));
+	}
+	for (i = 0; i < drvdata->numcidc; i++)
+		etm4x_relaxed_write64(csa, config->ctxid_pid[i], TRCCIDCVRn(i));
+	etm4x_relaxed_write32(csa, config->ctxid_mask0, TRCCIDCCTLR0);
+	if (drvdata->numcidc > 4)
+		etm4x_relaxed_write32(csa, config->ctxid_mask1, TRCCIDCCTLR1);
+
+	for (i = 0; i < drvdata->numvmidc; i++)
+		etm4x_relaxed_write64(csa, config->vmid_val[i], TRCVMIDCVRn(i));
+	etm4x_relaxed_write32(csa, config->vmid_mask0, TRCVMIDCCTLR0);
+	if (drvdata->numvmidc > 4)
+		etm4x_relaxed_write32(csa, config->vmid_mask1, TRCVMIDCCTLR1);
+
+	if (!drvdata->skip_power_up) {
+		u32 trcpdcr = etm4x_relaxed_read32(csa, TRCPDCR);
+
+		/*
+		 * Request to keep the trace unit powered and also
+		 * emulation of powerdown
+		 */
+		etm4x_relaxed_write32(csa, trcpdcr | TRCPDCR_PU, TRCPDCR);
+	}
+
+	/*
+	 * ETE mandates that the TRCRSR is written to before
+	 * enabling it.
+	 */
+	if (etm4x_is_ete(drvdata))
+		etm4x_relaxed_write32(csa, TRCRSR_TA, TRCRSR);
+
+	etm4x_allow_trace(drvdata);
+	/* Enable the trace unit */
+	etm4x_relaxed_write32(csa, 1, TRCPRGCTLR);
+
+	/* Synchronize the register updates for sysreg access */
+	if (!csa->io_mem)
+		isb();
+
+	/* wait for TRCSTATR.IDLE to go back down to '0' */
+	if (coresight_timeout(csa, TRCSTATR, TRCSTATR_IDLE_BIT, 0))
+		dev_err(etm_dev,
+			"timeout while waiting for Idle Trace Status\n");
+
+	/*
+	 * As recommended by section 4.3.7 ("Synchronization when using the
+	 * memory-mapped interface") of ARM IHI 0064D
+	 */
+	dsb(sy);
+	isb();
+
+done:
+	etm4_cs_lock(drvdata, csa);
+
+	dev_dbg(etm_dev, "cpu: %d enable smp call done: %d\n",
+		drvdata->cpu, rc);
+	return rc;
+}
+
+static void etm4_enable_hw_smp_call(void *info)
+{
+	struct etm4_enable_arg *arg = info;
+
+	if (WARN_ON(!arg))
+		return;
+	arg->rc = etm4_enable_hw(arg->drvdata);
+}
+
+/*
+ * The goal of function etm4_config_timestamp_event() is to configure a
+ * counter that will tell the tracer to emit a timestamp packet when it
+ * reaches zero.  This is done in order to get a more fine grained idea
+ * of when instructions are executed so that they can be correlated
+ * with execution on other CPUs.
+ *
+ * To do this the counter itself is configured to self reload and
+ * TRCRSCTLR1 (always true) used to get the counter to decrement.  From
+ * there a resource selector is configured with the counter and the
+ * timestamp control register to use the resource selector to trigger the
+ * event that will insert a timestamp packet in the stream.
+ */
+static int etm4_config_timestamp_event(struct etmv4_drvdata *drvdata)
+{
+	int ctridx, ret = -EINVAL;
+	int counter, rselector;
+	u32 val = 0;
+	struct etmv4_config *config = &drvdata->config;
+
+	/* No point in trying if we don't have at least one counter */
+	if (!drvdata->nr_cntr)
+		goto out;
+
+	/* Find a counter that hasn't been initialised */
+	for (ctridx = 0; ctridx < drvdata->nr_cntr; ctridx++)
+		if (config->cntr_val[ctridx] == 0)
+			break;
+
+	/* All the counters have been configured already, bail out */
+	if (ctridx == drvdata->nr_cntr) {
+		pr_debug("%s: no available counter found\n", __func__);
+		ret = -ENOSPC;
+		goto out;
+	}
+
+	/*
+	 * Searching for an available resource selector to use, starting at
+	 * '2' since every implementation has at least 2 resource selector.
+	 * ETMIDR4 gives the number of resource selector _pairs_,
+	 * hence multiply by 2.
+	 */
+	for (rselector = 2; rselector < drvdata->nr_resource * 2; rselector++)
+		if (!config->res_ctrl[rselector])
+			break;
+
+	if (rselector == drvdata->nr_resource * 2) {
+		pr_debug("%s: no available resource selector found\n",
+			 __func__);
+		ret = -ENOSPC;
+		goto out;
+	}
+
+	/* Remember what counter we used */
+	counter = 1 << ctridx;
+
+	/*
+	 * Initialise original and reload counter value to the smallest
+	 * possible value in order to get as much precision as we can.
+	 */
+	config->cntr_val[ctridx] = 1;
+	config->cntrldvr[ctridx] = 1;
+
+	/* Set the trace counter control register */
+	val =  0x1 << 16	|  /* Bit 16, reload counter automatically */
+	       0x0 << 7		|  /* Select single resource selector */
+	       0x1;		   /* Resource selector 1, i.e always true */
+
+	config->cntr_ctrl[ctridx] = val;
+
+	val = 0x2 << 16		| /* Group 0b0010 - Counter and sequencers */
+	      counter << 0;	  /* Counter to use */
+
+	config->res_ctrl[rselector] = val;
+
+	val = 0x0 << 7		| /* Select single resource selector */
+	      rselector;	  /* Resource selector */
+
+	config->ts_ctrl = val;
+
+	ret = 0;
+out:
+	return ret;
+}
+
+static int etm4_parse_event_config(struct coresight_device *csdev,
+				   struct perf_event *event)
+{
+	int ret = 0;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct etmv4_config *config = &drvdata->config;
+	struct perf_event_attr *attr = &event->attr;
+	unsigned long cfg_hash;
+	int preset;
+
+	/* Clear configuration from previous run */
+	memset(config, 0, sizeof(struct etmv4_config));
+
+	if (attr->exclude_kernel)
+		config->mode = ETM_MODE_EXCL_KERN;
+
+	if (attr->exclude_user)
+		config->mode = ETM_MODE_EXCL_USER;
+
+	/* Always start from the default config */
+	etm4_set_default_config(config);
+
+	/* Configure filters specified on the perf cmd line, if any. */
+	ret = etm4_set_event_filters(drvdata, event);
+	if (ret)
+		goto out;
+
+	/* Go from generic option to ETMv4 specifics */
+	if (attr->config & BIT(ETM_OPT_CYCACC)) {
+		config->cfg |= TRCCONFIGR_CCI;
+		/* TRM: Must program this for cycacc to work */
+		config->ccctlr = ETM_CYC_THRESHOLD_DEFAULT;
+	}
+	if (attr->config & BIT(ETM_OPT_TS)) {
+		/*
+		 * Configure timestamps to be emitted at regular intervals in
+		 * order to correlate instructions executed on different CPUs
+		 * (CPU-wide trace scenarios).
+		 */
+		ret = etm4_config_timestamp_event(drvdata);
+
+		/*
+		 * No need to go further if timestamp intervals can't
+		 * be configured.
+		 */
+		if (ret)
+			goto out;
+
+		/* bit[11], Global timestamp tracing bit */
+		config->cfg |= TRCCONFIGR_TS;
+	}
+
+	/* Only trace contextID when runs in root PID namespace */
+	if ((attr->config & BIT(ETM_OPT_CTXTID)) &&
+	    task_is_in_init_pid_ns(current))
+		/* bit[6], Context ID tracing bit */
+		config->cfg |= TRCCONFIGR_CID;
+
+	/*
+	 * If set bit ETM_OPT_CTXTID2 in perf config, this asks to trace VMID
+	 * for recording CONTEXTIDR_EL2.  Do not enable VMID tracing if the
+	 * kernel is not running in EL2.
+	 */
+	if (attr->config & BIT(ETM_OPT_CTXTID2)) {
+		if (!is_kernel_in_hyp_mode()) {
+			ret = -EINVAL;
+			goto out;
+		}
+		/* Only trace virtual contextID when runs in root PID namespace */
+		if (task_is_in_init_pid_ns(current))
+			config->cfg |= TRCCONFIGR_VMID | TRCCONFIGR_VMIDOPT;
+	}
+
+	/* return stack - enable if selected and supported */
+	if ((attr->config & BIT(ETM_OPT_RETSTK)) && drvdata->retstack)
+		/* bit[12], Return stack enable bit */
+		config->cfg |= TRCCONFIGR_RS;
+
+	/*
+	 * Set any selected configuration and preset.
+	 *
+	 * This extracts the values of PMU_FORMAT_ATTR(configid) and PMU_FORMAT_ATTR(preset)
+	 * in the perf attributes defined in coresight-etm-perf.c.
+	 * configid uses bits 63:32 of attr->config2, preset uses bits 3:0 of attr->config.
+	 * A zero configid means no configuration active, preset = 0 means no preset selected.
+	 */
+	if (attr->config2 & GENMASK_ULL(63, 32)) {
+		cfg_hash = (u32)(attr->config2 >> 32);
+		preset = attr->config & 0xF;
+		ret = cscfg_csdev_enable_active_config(csdev, cfg_hash, preset);
+	}
+
+	/* branch broadcast - enable if selected and supported */
+	if (attr->config & BIT(ETM_OPT_BRANCH_BROADCAST)) {
+		if (!drvdata->trcbb) {
+			/*
+			 * Missing BB support could cause silent decode errors
+			 * so fail to open if it's not supported.
+			 */
+			ret = -EINVAL;
+			goto out;
+		} else {
+			config->cfg |= BIT(ETM4_CFG_BIT_BB);
+		}
+	}
+
+out:
+	return ret;
+}
+
+static int etm4_enable_perf(struct coresight_device *csdev,
+			    struct perf_event *event)
+{
+	int ret = 0, trace_id;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	if (WARN_ON_ONCE(drvdata->cpu != smp_processor_id())) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* Configure the tracer based on the session's specifics */
+	ret = etm4_parse_event_config(csdev, event);
+	if (ret)
+		goto out;
+
+	/*
+	 * perf allocates cpu ids as part of _setup_aux() - device needs to use
+	 * the allocated ID. This reads the current version without allocation.
+	 *
+	 * This does not use the trace id lock to prevent lock_dep issues
+	 * with perf locks - we know the ID cannot change until perf shuts down
+	 * the session
+	 */
+	trace_id = coresight_trace_id_read_cpu_id(drvdata->cpu);
+	if (!IS_VALID_CS_TRACE_ID(trace_id)) {
+		dev_err(&drvdata->csdev->dev, "Failed to set trace ID for %s on CPU%d\n",
+			dev_name(&drvdata->csdev->dev), drvdata->cpu);
+		ret = -EINVAL;
+		goto out;
+	}
+	drvdata->trcid = (u8)trace_id;
+
+	/* And enable it */
+	ret = etm4_enable_hw(drvdata);
+
+out:
+	return ret;
+}
+
+static int etm4_enable_sysfs(struct coresight_device *csdev)
+{
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct etm4_enable_arg arg = { };
+	unsigned long cfg_hash;
+	int ret, preset;
+
+	/* enable any config activated by configfs */
+	cscfg_config_sysfs_get_active_cfg(&cfg_hash, &preset);
+	if (cfg_hash) {
+		ret = cscfg_csdev_enable_active_config(csdev, cfg_hash, preset);
+		if (ret)
+			return ret;
+	}
+
+	spin_lock(&drvdata->spinlock);
+
+	/* sysfs needs to read and allocate a trace ID */
+	ret = etm4_read_alloc_trace_id(drvdata);
+	if (ret < 0)
+		goto unlock_sysfs_enable;
+
+	/*
+	 * Executing etm4_enable_hw on the cpu whose ETM is being enabled
+	 * ensures that register writes occur when cpu is powered.
+	 */
+	arg.drvdata = drvdata;
+	ret = smp_call_function_single(drvdata->cpu,
+				       etm4_enable_hw_smp_call, &arg, 1);
+	if (!ret)
+		ret = arg.rc;
+	if (!ret)
+		drvdata->sticky_enable = true;
+
+	if (ret)
+		etm4_release_trace_id(drvdata);
+
+unlock_sysfs_enable:
+	spin_unlock(&drvdata->spinlock);
+
+	if (!ret)
+		dev_dbg(&csdev->dev, "ETM tracing enabled\n");
+	return ret;
+}
+
+static int etm4_enable(struct coresight_device *csdev, struct perf_event *event,
+		       enum cs_mode mode)
+{
+	int ret;
+	u32 val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	val = local_cmpxchg(&drvdata->mode, CS_MODE_DISABLED, mode);
+
+	/* Someone is already using the tracer */
+	if (val)
+		return -EBUSY;
+
+	switch (mode) {
+	case CS_MODE_SYSFS:
+		ret = etm4_enable_sysfs(csdev);
+		break;
+	case CS_MODE_PERF:
+		ret = etm4_enable_perf(csdev, event);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	/* The tracer didn't start */
+	if (ret)
+		local_set(&drvdata->mode, CS_MODE_DISABLED);
+
+	return ret;
+}
+
+static void etm4_disable_hw(void *info)
+{
+	u32 control;
+	struct etmv4_drvdata *drvdata = info;
+	struct etmv4_config *config = &drvdata->config;
+	struct coresight_device *csdev = drvdata->csdev;
+	struct device *etm_dev = &csdev->dev;
+	struct csdev_access *csa = &csdev->access;
+	int i;
+
+	etm4_cs_unlock(drvdata, csa);
+	etm4_disable_arch_specific(drvdata);
+
+	if (!drvdata->skip_power_up) {
+		/* power can be removed from the trace unit now */
+		control = etm4x_relaxed_read32(csa, TRCPDCR);
+		control &= ~TRCPDCR_PU;
+		etm4x_relaxed_write32(csa, control, TRCPDCR);
+	}
+
+	control = etm4x_relaxed_read32(csa, TRCPRGCTLR);
+
+	/* EN, bit[0] Trace unit enable bit */
+	control &= ~0x1;
+
+	/*
+	 * If the CPU supports v8.4 Trace filter Control,
+	 * set the ETM to trace prohibited region.
+	 */
+	etm4x_prohibit_trace(drvdata);
+	/*
+	 * Make sure everything completes before disabling, as recommended
+	 * by section 7.3.77 ("TRCVICTLR, ViewInst Main Control Register,
+	 * SSTATUS") of ARM IHI 0064D
+	 */
+	dsb(sy);
+	isb();
+	/* Trace synchronization barrier, is a nop if not supported */
+	tsb_csync();
+	etm4x_relaxed_write32(csa, control, TRCPRGCTLR);
+
+	/* wait for TRCSTATR.PMSTABLE to go to '1' */
+	if (coresight_timeout(csa, TRCSTATR, TRCSTATR_PMSTABLE_BIT, 1))
+		dev_err(etm_dev,
+			"timeout while waiting for PM stable Trace Status\n");
+	/* read the status of the single shot comparators */
+	for (i = 0; i < drvdata->nr_ss_cmp; i++) {
+		config->ss_status[i] =
+			etm4x_relaxed_read32(csa, TRCSSCSRn(i));
+	}
+
+	/* read back the current counter values */
+	for (i = 0; i < drvdata->nr_cntr; i++) {
+		config->cntr_val[i] =
+			etm4x_relaxed_read32(csa, TRCCNTVRn(i));
+	}
+
+	coresight_disclaim_device_unlocked(csdev);
+	etm4_cs_lock(drvdata, csa);
+
+	dev_dbg(&drvdata->csdev->dev,
+		"cpu: %d disable smp call done\n", drvdata->cpu);
+}
+
+static int etm4_disable_perf(struct coresight_device *csdev,
+			     struct perf_event *event)
+{
+	u32 control;
+	struct etm_filters *filters = event->hw.addr_filters;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct perf_event_attr *attr = &event->attr;
+
+	if (WARN_ON_ONCE(drvdata->cpu != smp_processor_id()))
+		return -EINVAL;
+
+	etm4_disable_hw(drvdata);
+	/*
+	 * The config_id occupies bits 63:32 of the config2 perf event attr
+	 * field. If this is non-zero then we will have enabled a config.
+	 */
+	if (attr->config2 & GENMASK_ULL(63, 32))
+		cscfg_csdev_disable_active_config(csdev);
+
+	/*
+	 * Check if the start/stop logic was active when the unit was stopped.
+	 * That way we can re-enable the start/stop logic when the process is
+	 * scheduled again.  Configuration of the start/stop logic happens in
+	 * function etm4_set_event_filters().
+	 */
+	control = etm4x_relaxed_read32(&csdev->access, TRCVICTLR);
+	/* TRCVICTLR::SSSTATUS, bit[9] */
+	filters->ssstatus = (control & BIT(9));
+
+	/*
+	 * perf will release trace ids when _free_aux() is
+	 * called at the end of the session.
+	 */
+
+	return 0;
+}
+
+static void etm4_disable_sysfs(struct coresight_device *csdev)
+{
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	/*
+	 * Taking hotplug lock here protects from clocks getting disabled
+	 * with tracing being left on (crash scenario) if user disable occurs
+	 * after cpu online mask indicates the cpu is offline but before the
+	 * DYING hotplug callback is serviced by the ETM driver.
+	 */
+	cpus_read_lock();
+	spin_lock(&drvdata->spinlock);
+
+	/*
+	 * Executing etm4_disable_hw on the cpu whose ETM is being disabled
+	 * ensures that register writes occur when cpu is powered.
+	 */
+	smp_call_function_single(drvdata->cpu, etm4_disable_hw, drvdata, 1);
+
+	spin_unlock(&drvdata->spinlock);
+	cpus_read_unlock();
+
+	/*
+	 * we only release trace IDs when resetting sysfs.
+	 * This permits sysfs users to read the trace ID after the trace
+	 * session has completed. This maintains operational behaviour with
+	 * prior trace id allocation method
+	 */
+
+	dev_dbg(&csdev->dev, "ETM tracing disabled\n");
+}
+
+static void etm4_disable(struct coresight_device *csdev,
+			 struct perf_event *event)
+{
+	enum cs_mode mode;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	/*
+	 * For as long as the tracer isn't disabled another entity can't
+	 * change its status.  As such we can read the status here without
+	 * fearing it will change under us.
+	 */
+	mode = local_read(&drvdata->mode);
+
+	switch (mode) {
+	case CS_MODE_DISABLED:
+		break;
+	case CS_MODE_SYSFS:
+		etm4_disable_sysfs(csdev);
+		break;
+	case CS_MODE_PERF:
+		etm4_disable_perf(csdev, event);
+		break;
+	}
+
+	if (mode)
+		local_set(&drvdata->mode, CS_MODE_DISABLED);
+}
+
+static const struct coresight_ops_source etm4_source_ops = {
+	.cpu_id		= etm4_cpu_id,
+	.enable		= etm4_enable,
+	.disable	= etm4_disable,
+};
+
+static const struct coresight_ops etm4_cs_ops = {
+	.source_ops	= &etm4_source_ops,
+};
+
+static inline bool cpu_supports_sysreg_trace(void)
+{
+	u64 dfr0 = read_sysreg_s(SYS_ID_AA64DFR0_EL1);
+
+	return ((dfr0 >> ID_AA64DFR0_EL1_TraceVer_SHIFT) & 0xfUL) > 0;
+}
+
+static bool etm4_init_sysreg_access(struct etmv4_drvdata *drvdata,
+				    struct csdev_access *csa)
+{
+	u32 devarch;
+
+	if (!cpu_supports_sysreg_trace())
+		return false;
+
+	/*
+	 * ETMs implementing sysreg access must implement TRCDEVARCH.
+	 */
+	devarch = read_etm4x_sysreg_const_offset(TRCDEVARCH);
+	switch (devarch & ETM_DEVARCH_ID_MASK) {
+	case ETM_DEVARCH_ETMv4x_ARCH:
+		*csa = (struct csdev_access) {
+			.io_mem	= false,
+			.read	= etm4x_sysreg_read,
+			.write	= etm4x_sysreg_write,
+		};
+		break;
+	case ETM_DEVARCH_ETE_ARCH:
+		*csa = (struct csdev_access) {
+			.io_mem	= false,
+			.read	= ete_sysreg_read,
+			.write	= ete_sysreg_write,
+		};
+		break;
+	default:
+		return false;
+	}
+
+	drvdata->arch = etm_devarch_to_arch(devarch);
+	return true;
+}
+
+static bool is_devtype_cpu_trace(void __iomem *base)
+{
+	u32 devtype = readl(base + TRCDEVTYPE);
+
+	return (devtype == CS_DEVTYPE_PE_TRACE);
+}
+
+static bool etm4_init_iomem_access(struct etmv4_drvdata *drvdata,
+				   struct csdev_access *csa)
+{
+	u32 devarch = readl_relaxed(drvdata->base + TRCDEVARCH);
+
+	if (!is_coresight_device(drvdata->base) || !is_devtype_cpu_trace(drvdata->base))
+		return false;
+
+	/*
+	 * All ETMs must implement TRCDEVARCH to indicate that
+	 * the component is an ETMv4. Even though TRCIDR1 also
+	 * contains the information, it is part of the "Trace"
+	 * register and must be accessed with the OSLK cleared,
+	 * with MMIO. But we cannot touch the OSLK until we are
+	 * sure this is an ETM. So rely only on the TRCDEVARCH.
+	 */
+	if ((devarch & ETM_DEVARCH_ID_MASK) != ETM_DEVARCH_ETMv4x_ARCH) {
+		pr_warn_once("TRCDEVARCH doesn't match ETMv4 architecture\n");
+		return false;
+	}
+
+	drvdata->arch = etm_devarch_to_arch(devarch);
+	*csa = CSDEV_ACCESS_IOMEM(drvdata->base);
+	return true;
+}
+
+static bool etm4_init_csdev_access(struct etmv4_drvdata *drvdata,
+				   struct csdev_access *csa)
+{
+	/*
+	 * Always choose the memory mapped io, if there is
+	 * a memory map to prevent sysreg access on broken
+	 * systems.
+	 */
+	if (drvdata->base)
+		return etm4_init_iomem_access(drvdata, csa);
+
+	if (etm4_init_sysreg_access(drvdata, csa))
+		return true;
+
+	return false;
+}
+
+static void cpu_detect_trace_filtering(struct etmv4_drvdata *drvdata)
+{
+	u64 dfr0 = read_sysreg(id_aa64dfr0_el1);
+	u64 trfcr;
+
+	drvdata->trfcr = 0;
+	if (!cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceFilt_SHIFT))
+		return;
+
+	/*
+	 * If the CPU supports v8.4 SelfHosted Tracing, enable
+	 * tracing at the kernel EL and EL0, forcing to use the
+	 * virtual time as the timestamp.
+	 */
+	trfcr = (TRFCR_ELx_TS_VIRTUAL |
+		 TRFCR_ELx_ExTRE |
+		 TRFCR_ELx_E0TRE);
+
+	/* If we are running at EL2, allow tracing the CONTEXTIDR_EL2. */
+	if (is_kernel_in_hyp_mode())
+		trfcr |= TRFCR_EL2_CX;
+
+	drvdata->trfcr = trfcr;
+}
+
+static void etm4_init_arch_data(void *info)
+{
+	u32 etmidr0;
+	u32 etmidr2;
+	u32 etmidr3;
+	u32 etmidr4;
+	u32 etmidr5;
+	struct etm4_init_arg *init_arg = info;
+	struct etmv4_drvdata *drvdata;
+	struct csdev_access *csa;
+	int i;
+
+	drvdata = dev_get_drvdata(init_arg->dev);
+	csa = init_arg->csa;
+
+	/*
+	 * If we are unable to detect the access mechanism,
+	 * or unable to detect the trace unit type, fail
+	 * early.
+	 */
+	if (!etm4_init_csdev_access(drvdata, csa))
+		return;
+
+	/* Detect the support for OS Lock before we actually use it */
+	etm_detect_os_lock(drvdata, csa);
+
+	/* Make sure all registers are accessible */
+	etm4_os_unlock_csa(drvdata, csa);
+	etm4_cs_unlock(drvdata, csa);
+
+	etm4_check_arch_features(drvdata, csa);
+
+	/* find all capabilities of the tracing unit */
+	etmidr0 = etm4x_relaxed_read32(csa, TRCIDR0);
+
+	/* INSTP0, bits[2:1] P0 tracing support field */
+	drvdata->instrp0 = !!(FIELD_GET(TRCIDR0_INSTP0_MASK, etmidr0) == 0b11);
+	/* TRCBB, bit[5] Branch broadcast tracing support bit */
+	drvdata->trcbb = !!(etmidr0 & TRCIDR0_TRCBB);
+	/* TRCCOND, bit[6] Conditional instruction tracing support bit */
+	drvdata->trccond = !!(etmidr0 & TRCIDR0_TRCCOND);
+	/* TRCCCI, bit[7] Cycle counting instruction bit */
+	drvdata->trccci = !!(etmidr0 & TRCIDR0_TRCCCI);
+	/* RETSTACK, bit[9] Return stack bit */
+	drvdata->retstack = !!(etmidr0 & TRCIDR0_RETSTACK);
+	/* NUMEVENT, bits[11:10] Number of events field */
+	drvdata->nr_event = FIELD_GET(TRCIDR0_NUMEVENT_MASK, etmidr0);
+	/* QSUPP, bits[16:15] Q element support field */
+	drvdata->q_support = FIELD_GET(TRCIDR0_QSUPP_MASK, etmidr0);
+	/* TSSIZE, bits[28:24] Global timestamp size field */
+	drvdata->ts_size = FIELD_GET(TRCIDR0_TSSIZE_MASK, etmidr0);
+
+	/* maximum size of resources */
+	etmidr2 = etm4x_relaxed_read32(csa, TRCIDR2);
+	/* CIDSIZE, bits[9:5] Indicates the Context ID size */
+	drvdata->ctxid_size = FIELD_GET(TRCIDR2_CIDSIZE_MASK, etmidr2);
+	/* VMIDSIZE, bits[14:10] Indicates the VMID size */
+	drvdata->vmid_size = FIELD_GET(TRCIDR2_VMIDSIZE_MASK, etmidr2);
+	/* CCSIZE, bits[28:25] size of the cycle counter in bits minus 12 */
+	drvdata->ccsize = FIELD_GET(TRCIDR2_CCSIZE_MASK, etmidr2);
+
+	etmidr3 = etm4x_relaxed_read32(csa, TRCIDR3);
+	/* CCITMIN, bits[11:0] minimum threshold value that can be programmed */
+	drvdata->ccitmin = FIELD_GET(TRCIDR3_CCITMIN_MASK, etmidr3);
+	/* EXLEVEL_S, bits[19:16] Secure state instruction tracing */
+	drvdata->s_ex_level = FIELD_GET(TRCIDR3_EXLEVEL_S_MASK, etmidr3);
+	drvdata->config.s_ex_level = drvdata->s_ex_level;
+	/* EXLEVEL_NS, bits[23:20] Non-secure state instruction tracing */
+	drvdata->ns_ex_level = FIELD_GET(TRCIDR3_EXLEVEL_NS_MASK, etmidr3);
+	/*
+	 * TRCERR, bit[24] whether a trace unit can trace a
+	 * system error exception.
+	 */
+	drvdata->trc_error = !!(etmidr3 & TRCIDR3_TRCERR);
+	/* SYNCPR, bit[25] implementation has a fixed synchronization period? */
+	drvdata->syncpr = !!(etmidr3 & TRCIDR3_SYNCPR);
+	/* STALLCTL, bit[26] is stall control implemented? */
+	drvdata->stallctl = !!(etmidr3 & TRCIDR3_STALLCTL);
+	/* SYSSTALL, bit[27] implementation can support stall control? */
+	drvdata->sysstall = !!(etmidr3 & TRCIDR3_SYSSTALL);
+	/*
+	 * NUMPROC - the number of PEs available for tracing, 5bits
+	 *         = TRCIDR3.bits[13:12]bits[30:28]
+	 *  bits[4:3] = TRCIDR3.bits[13:12] (since etm-v4.2, otherwise RES0)
+	 *  bits[3:0] = TRCIDR3.bits[30:28]
+	 */
+	drvdata->nr_pe =  (FIELD_GET(TRCIDR3_NUMPROC_HI_MASK, etmidr3) << 3) |
+			   FIELD_GET(TRCIDR3_NUMPROC_LO_MASK, etmidr3);
+	/* NOOVERFLOW, bit[31] is trace overflow prevention supported */
+	drvdata->nooverflow = !!(etmidr3 & TRCIDR3_NOOVERFLOW);
+
+	/* number of resources trace unit supports */
+	etmidr4 = etm4x_relaxed_read32(csa, TRCIDR4);
+	/* NUMACPAIRS, bits[0:3] number of addr comparator pairs for tracing */
+	drvdata->nr_addr_cmp = FIELD_GET(TRCIDR4_NUMACPAIRS_MASK, etmidr4);
+	/* NUMPC, bits[15:12] number of PE comparator inputs for tracing */
+	drvdata->nr_pe_cmp = FIELD_GET(TRCIDR4_NUMPC_MASK, etmidr4);
+	/*
+	 * NUMRSPAIR, bits[19:16]
+	 * The number of resource pairs conveyed by the HW starts at 0, i.e a
+	 * value of 0x0 indicate 1 resource pair, 0x1 indicate two and so on.
+	 * As such add 1 to the value of NUMRSPAIR for a better representation.
+	 *
+	 * For ETM v4.3 and later, 0x0 means 0, and no pairs are available -
+	 * the default TRUE and FALSE resource selectors are omitted.
+	 * Otherwise for values 0x1 and above the number is N + 1 as per v4.2.
+	 */
+	drvdata->nr_resource = FIELD_GET(TRCIDR4_NUMRSPAIR_MASK, etmidr4);
+	if ((drvdata->arch < ETM_ARCH_V4_3) || (drvdata->nr_resource > 0))
+		drvdata->nr_resource += 1;
+	/*
+	 * NUMSSCC, bits[23:20] the number of single-shot
+	 * comparator control for tracing. Read any status regs as these
+	 * also contain RO capability data.
+	 */
+	drvdata->nr_ss_cmp = FIELD_GET(TRCIDR4_NUMSSCC_MASK, etmidr4);
+	for (i = 0; i < drvdata->nr_ss_cmp; i++) {
+		drvdata->config.ss_status[i] =
+			etm4x_relaxed_read32(csa, TRCSSCSRn(i));
+	}
+	/* NUMCIDC, bits[27:24] number of Context ID comparators for tracing */
+	drvdata->numcidc = FIELD_GET(TRCIDR4_NUMCIDC_MASK, etmidr4);
+	/* NUMVMIDC, bits[31:28] number of VMID comparators for tracing */
+	drvdata->numvmidc = FIELD_GET(TRCIDR4_NUMVMIDC_MASK, etmidr4);
+
+	etmidr5 = etm4x_relaxed_read32(csa, TRCIDR5);
+	/* NUMEXTIN, bits[8:0] number of external inputs implemented */
+	drvdata->nr_ext_inp = FIELD_GET(TRCIDR5_NUMEXTIN_MASK, etmidr5);
+	/* TRACEIDSIZE, bits[21:16] indicates the trace ID width */
+	drvdata->trcid_size = FIELD_GET(TRCIDR5_TRACEIDSIZE_MASK, etmidr5);
+	/* ATBTRIG, bit[22] implementation can support ATB triggers? */
+	drvdata->atbtrig = !!(etmidr5 & TRCIDR5_ATBTRIG);
+	/*
+	 * LPOVERRIDE, bit[23] implementation supports
+	 * low-power state override
+	 */
+	drvdata->lpoverride = (etmidr5 & TRCIDR5_LPOVERRIDE) && (!drvdata->skip_power_up);
+	/* NUMSEQSTATE, bits[27:25] number of sequencer states implemented */
+	drvdata->nrseqstate = FIELD_GET(TRCIDR5_NUMSEQSTATE_MASK, etmidr5);
+	/* NUMCNTR, bits[30:28] number of counters available for tracing */
+	drvdata->nr_cntr = FIELD_GET(TRCIDR5_NUMCNTR_MASK, etmidr5);
+	etm4_cs_lock(drvdata, csa);
+	cpu_detect_trace_filtering(drvdata);
+}
+
+static inline u32 etm4_get_victlr_access_type(struct etmv4_config *config)
+{
+	return etm4_get_access_type(config) << __bf_shf(TRCVICTLR_EXLEVEL_MASK);
+}
+
+/* Set ELx trace filter access in the TRCVICTLR register */
+static void etm4_set_victlr_access(struct etmv4_config *config)
+{
+	config->vinst_ctrl &= ~TRCVICTLR_EXLEVEL_MASK;
+	config->vinst_ctrl |= etm4_get_victlr_access_type(config);
+}
+
+static void etm4_set_default_config(struct etmv4_config *config)
+{
+	/* disable all events tracing */
+	config->eventctrl0 = 0x0;
+	config->eventctrl1 = 0x0;
+
+	/* disable stalling */
+	config->stall_ctrl = 0x0;
+
+	/* enable trace synchronization every 4096 bytes, if available */
+	config->syncfreq = 0xC;
+
+	/* disable timestamp event */
+	config->ts_ctrl = 0x0;
+
+	/* TRCVICTLR::EVENT = 0x01, select the always on logic */
+	config->vinst_ctrl = FIELD_PREP(TRCVICTLR_EVENT_MASK, 0x01);
+
+	/* TRCVICTLR::EXLEVEL_NS:EXLEVELS: Set kernel / user filtering */
+	etm4_set_victlr_access(config);
+}
+
+static u64 etm4_get_ns_access_type(struct etmv4_config *config)
+{
+	u64 access_type = 0;
+
+	/*
+	 * EXLEVEL_NS, for NonSecure Exception levels.
+	 * The mask here is a generic value and must be
+	 * shifted to the corresponding field for the registers
+	 */
+	if (!is_kernel_in_hyp_mode()) {
+		/* Stay away from hypervisor mode for non-VHE */
+		access_type =  ETM_EXLEVEL_NS_HYP;
+		if (config->mode & ETM_MODE_EXCL_KERN)
+			access_type |= ETM_EXLEVEL_NS_OS;
+	} else if (config->mode & ETM_MODE_EXCL_KERN) {
+		access_type = ETM_EXLEVEL_NS_HYP;
+	}
+
+	if (config->mode & ETM_MODE_EXCL_USER)
+		access_type |= ETM_EXLEVEL_NS_APP;
+
+	return access_type;
+}
+
+/*
+ * Construct the exception level masks for a given config.
+ * This must be shifted to the corresponding register field
+ * for usage.
+ */
+static u64 etm4_get_access_type(struct etmv4_config *config)
+{
+	/* All Secure exception levels are excluded from the trace */
+	return etm4_get_ns_access_type(config) | (u64)config->s_ex_level;
+}
+
+static u64 etm4_get_comparator_access_type(struct etmv4_config *config)
+{
+	return etm4_get_access_type(config) << TRCACATR_EXLEVEL_SHIFT;
+}
+
+static void etm4_set_comparator_filter(struct etmv4_config *config,
+				       u64 start, u64 stop, int comparator)
+{
+	u64 access_type = etm4_get_comparator_access_type(config);
+
+	/* First half of default address comparator */
+	config->addr_val[comparator] = start;
+	config->addr_acc[comparator] = access_type;
+	config->addr_type[comparator] = ETM_ADDR_TYPE_RANGE;
+
+	/* Second half of default address comparator */
+	config->addr_val[comparator + 1] = stop;
+	config->addr_acc[comparator + 1] = access_type;
+	config->addr_type[comparator + 1] = ETM_ADDR_TYPE_RANGE;
+
+	/*
+	 * Configure the ViewInst function to include this address range
+	 * comparator.
+	 *
+	 * @comparator is divided by two since it is the index in the
+	 * etmv4_config::addr_val array but register TRCVIIECTLR deals with
+	 * address range comparator _pairs_.
+	 *
+	 * Therefore:
+	 *	index 0 -> compatator pair 0
+	 *	index 2 -> comparator pair 1
+	 *	index 4 -> comparator pair 2
+	 *	...
+	 *	index 14 -> comparator pair 7
+	 */
+	config->viiectlr |= BIT(comparator / 2);
+}
+
+static void etm4_set_start_stop_filter(struct etmv4_config *config,
+				       u64 address, int comparator,
+				       enum etm_addr_type type)
+{
+	int shift;
+	u64 access_type = etm4_get_comparator_access_type(config);
+
+	/* Configure the comparator */
+	config->addr_val[comparator] = address;
+	config->addr_acc[comparator] = access_type;
+	config->addr_type[comparator] = type;
+
+	/*
+	 * Configure ViewInst Start-Stop control register.
+	 * Addresses configured to start tracing go from bit 0 to n-1,
+	 * while those configured to stop tracing from 16 to 16 + n-1.
+	 */
+	shift = (type == ETM_ADDR_TYPE_START ? 0 : 16);
+	config->vissctlr |= BIT(shift + comparator);
+}
+
+static void etm4_set_default_filter(struct etmv4_config *config)
+{
+	/* Trace everything 'default' filter achieved by no filtering */
+	config->viiectlr = 0x0;
+
+	/*
+	 * TRCVICTLR::SSSTATUS == 1, the start-stop logic is
+	 * in the started state
+	 */
+	config->vinst_ctrl |= TRCVICTLR_SSSTATUS;
+	config->mode |= ETM_MODE_VIEWINST_STARTSTOP;
+
+	/* No start-stop filtering for ViewInst */
+	config->vissctlr = 0x0;
+}
+
+static void etm4_set_default(struct etmv4_config *config)
+{
+	if (WARN_ON_ONCE(!config))
+		return;
+
+	/*
+	 * Make default initialisation trace everything
+	 *
+	 * This is done by a minimum default config sufficient to enable
+	 * full instruction trace - with a default filter for trace all
+	 * achieved by having no filtering.
+	 */
+	etm4_set_default_config(config);
+	etm4_set_default_filter(config);
+}
+
+static int etm4_get_next_comparator(struct etmv4_drvdata *drvdata, u32 type)
+{
+	int nr_comparator, index = 0;
+	struct etmv4_config *config = &drvdata->config;
+
+	/*
+	 * nr_addr_cmp holds the number of comparator _pair_, so time 2
+	 * for the total number of comparators.
+	 */
+	nr_comparator = drvdata->nr_addr_cmp * 2;
+
+	/* Go through the tally of comparators looking for a free one. */
+	while (index < nr_comparator) {
+		switch (type) {
+		case ETM_ADDR_TYPE_RANGE:
+			if (config->addr_type[index] == ETM_ADDR_TYPE_NONE &&
+			    config->addr_type[index + 1] == ETM_ADDR_TYPE_NONE)
+				return index;
+
+			/* Address range comparators go in pairs */
+			index += 2;
+			break;
+		case ETM_ADDR_TYPE_START:
+		case ETM_ADDR_TYPE_STOP:
+			if (config->addr_type[index] == ETM_ADDR_TYPE_NONE)
+				return index;
+
+			/* Start/stop address can have odd indexes */
+			index += 1;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	/* If we are here all the comparators have been used. */
+	return -ENOSPC;
+}
+
+static int etm4_set_event_filters(struct etmv4_drvdata *drvdata,
+				  struct perf_event *event)
+{
+	int i, comparator, ret = 0;
+	u64 address;
+	struct etmv4_config *config = &drvdata->config;
+	struct etm_filters *filters = event->hw.addr_filters;
+
+	if (!filters)
+		goto default_filter;
+
+	/* Sync events with what Perf got */
+	perf_event_addr_filters_sync(event);
+
+	/*
+	 * If there are no filters to deal with simply go ahead with
+	 * the default filter, i.e the entire address range.
+	 */
+	if (!filters->nr_filters)
+		goto default_filter;
+
+	for (i = 0; i < filters->nr_filters; i++) {
+		struct etm_filter *filter = &filters->etm_filter[i];
+		enum etm_addr_type type = filter->type;
+
+		/* See if a comparator is free. */
+		comparator = etm4_get_next_comparator(drvdata, type);
+		if (comparator < 0) {
+			ret = comparator;
+			goto out;
+		}
+
+		switch (type) {
+		case ETM_ADDR_TYPE_RANGE:
+			etm4_set_comparator_filter(config,
+						   filter->start_addr,
+						   filter->stop_addr,
+						   comparator);
+			/*
+			 * TRCVICTLR::SSSTATUS == 1, the start-stop logic is
+			 * in the started state
+			 */
+			config->vinst_ctrl |= TRCVICTLR_SSSTATUS;
+
+			/* No start-stop filtering for ViewInst */
+			config->vissctlr = 0x0;
+			break;
+		case ETM_ADDR_TYPE_START:
+		case ETM_ADDR_TYPE_STOP:
+			/* Get the right start or stop address */
+			address = (type == ETM_ADDR_TYPE_START ?
+				   filter->start_addr :
+				   filter->stop_addr);
+
+			/* Configure comparator */
+			etm4_set_start_stop_filter(config, address,
+						   comparator, type);
+
+			/*
+			 * If filters::ssstatus == 1, trace acquisition was
+			 * started but the process was yanked away before the
+			 * stop address was hit.  As such the start/stop
+			 * logic needs to be re-started so that tracing can
+			 * resume where it left.
+			 *
+			 * The start/stop logic status when a process is
+			 * scheduled out is checked in function
+			 * etm4_disable_perf().
+			 */
+			if (filters->ssstatus)
+				config->vinst_ctrl |= TRCVICTLR_SSSTATUS;
+
+			/* No include/exclude filtering for ViewInst */
+			config->viiectlr = 0x0;
+			break;
+		default:
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+
+	goto out;
+
+
+default_filter:
+	etm4_set_default_filter(config);
+
+out:
+	return ret;
+}
+
+void etm4_config_trace_mode(struct etmv4_config *config)
+{
+	u32 mode;
+
+	mode = config->mode;
+	mode &= (ETM_MODE_EXCL_KERN | ETM_MODE_EXCL_USER);
+
+	/* excluding kernel AND user space doesn't make sense */
+	WARN_ON_ONCE(mode == (ETM_MODE_EXCL_KERN | ETM_MODE_EXCL_USER));
+
+	/* nothing to do if neither flags are set */
+	if (!(mode & ETM_MODE_EXCL_KERN) && !(mode & ETM_MODE_EXCL_USER))
+		return;
+
+	etm4_set_victlr_access(config);
+}
+
+static int etm4_online_cpu(unsigned int cpu)
+{
+	if (!etmdrvdata[cpu])
+		return etm4_probe_cpu(cpu);
+
+	if (etmdrvdata[cpu]->boot_enable && !etmdrvdata[cpu]->sticky_enable)
+		coresight_enable(etmdrvdata[cpu]->csdev);
+	return 0;
+}
+
+static int etm4_starting_cpu(unsigned int cpu)
+{
+	if (!etmdrvdata[cpu])
+		return 0;
+
+	spin_lock(&etmdrvdata[cpu]->spinlock);
+	if (!etmdrvdata[cpu]->os_unlock)
+		etm4_os_unlock(etmdrvdata[cpu]);
+
+	if (local_read(&etmdrvdata[cpu]->mode))
+		etm4_enable_hw(etmdrvdata[cpu]);
+	spin_unlock(&etmdrvdata[cpu]->spinlock);
+	return 0;
+}
+
+static int etm4_dying_cpu(unsigned int cpu)
+{
+	if (!etmdrvdata[cpu])
+		return 0;
+
+	spin_lock(&etmdrvdata[cpu]->spinlock);
+	if (local_read(&etmdrvdata[cpu]->mode))
+		etm4_disable_hw(etmdrvdata[cpu]);
+	spin_unlock(&etmdrvdata[cpu]->spinlock);
+	return 0;
+}
+
+static int __etm4_cpu_save(struct etmv4_drvdata *drvdata)
+{
+	int i, ret = 0;
+	struct etmv4_save_state *state;
+	struct coresight_device *csdev = drvdata->csdev;
+	struct csdev_access *csa;
+	struct device *etm_dev;
+
+	if (WARN_ON(!csdev))
+		return -ENODEV;
+
+	etm_dev = &csdev->dev;
+	csa = &csdev->access;
+
+	/*
+	 * As recommended by 3.4.1 ("The procedure when powering down the PE")
+	 * of ARM IHI 0064D
+	 */
+	dsb(sy);
+	isb();
+
+	etm4_cs_unlock(drvdata, csa);
+	/* Lock the OS lock to disable trace and external debugger access */
+	etm4_os_lock(drvdata);
+
+	/* wait for TRCSTATR.PMSTABLE to go up */
+	if (coresight_timeout(csa, TRCSTATR, TRCSTATR_PMSTABLE_BIT, 1)) {
+		dev_err(etm_dev,
+			"timeout while waiting for PM Stable Status\n");
+		etm4_os_unlock(drvdata);
+		ret = -EBUSY;
+		goto out;
+	}
+
+	state = drvdata->save_state;
+
+	state->trcprgctlr = etm4x_read32(csa, TRCPRGCTLR);
+	if (drvdata->nr_pe)
+		state->trcprocselr = etm4x_read32(csa, TRCPROCSELR);
+	state->trcconfigr = etm4x_read32(csa, TRCCONFIGR);
+	state->trcauxctlr = etm4x_read32(csa, TRCAUXCTLR);
+	state->trceventctl0r = etm4x_read32(csa, TRCEVENTCTL0R);
+	state->trceventctl1r = etm4x_read32(csa, TRCEVENTCTL1R);
+	if (drvdata->stallctl)
+		state->trcstallctlr = etm4x_read32(csa, TRCSTALLCTLR);
+	state->trctsctlr = etm4x_read32(csa, TRCTSCTLR);
+	state->trcsyncpr = etm4x_read32(csa, TRCSYNCPR);
+	state->trcccctlr = etm4x_read32(csa, TRCCCCTLR);
+	state->trcbbctlr = etm4x_read32(csa, TRCBBCTLR);
+	state->trctraceidr = etm4x_read32(csa, TRCTRACEIDR);
+	state->trcqctlr = etm4x_read32(csa, TRCQCTLR);
+
+	state->trcvictlr = etm4x_read32(csa, TRCVICTLR);
+	state->trcviiectlr = etm4x_read32(csa, TRCVIIECTLR);
+	state->trcvissctlr = etm4x_read32(csa, TRCVISSCTLR);
+	if (drvdata->nr_pe_cmp)
+		state->trcvipcssctlr = etm4x_read32(csa, TRCVIPCSSCTLR);
+	state->trcvdctlr = etm4x_read32(csa, TRCVDCTLR);
+	state->trcvdsacctlr = etm4x_read32(csa, TRCVDSACCTLR);
+	state->trcvdarcctlr = etm4x_read32(csa, TRCVDARCCTLR);
+
+	for (i = 0; i < drvdata->nrseqstate - 1; i++)
+		state->trcseqevr[i] = etm4x_read32(csa, TRCSEQEVRn(i));
+
+	if (drvdata->nrseqstate) {
+		state->trcseqrstevr = etm4x_read32(csa, TRCSEQRSTEVR);
+		state->trcseqstr = etm4x_read32(csa, TRCSEQSTR);
+	}
+	state->trcextinselr = etm4x_read32(csa, TRCEXTINSELR);
+
+	for (i = 0; i < drvdata->nr_cntr; i++) {
+		state->trccntrldvr[i] = etm4x_read32(csa, TRCCNTRLDVRn(i));
+		state->trccntctlr[i] = etm4x_read32(csa, TRCCNTCTLRn(i));
+		state->trccntvr[i] = etm4x_read32(csa, TRCCNTVRn(i));
+	}
+
+	for (i = 0; i < drvdata->nr_resource * 2; i++)
+		state->trcrsctlr[i] = etm4x_read32(csa, TRCRSCTLRn(i));
+
+	for (i = 0; i < drvdata->nr_ss_cmp; i++) {
+		state->trcssccr[i] = etm4x_read32(csa, TRCSSCCRn(i));
+		state->trcsscsr[i] = etm4x_read32(csa, TRCSSCSRn(i));
+		if (etm4x_sspcicrn_present(drvdata, i))
+			state->trcsspcicr[i] = etm4x_read32(csa, TRCSSPCICRn(i));
+	}
+
+	for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
+		state->trcacvr[i] = etm4x_read64(csa, TRCACVRn(i));
+		state->trcacatr[i] = etm4x_read64(csa, TRCACATRn(i));
+	}
+
+	/*
+	 * Data trace stream is architecturally prohibited for A profile cores
+	 * so we don't save (or later restore) trcdvcvr and trcdvcmr - As per
+	 * section 1.3.4 ("Possible functional configurations of an ETMv4 trace
+	 * unit") of ARM IHI 0064D.
+	 */
+
+	for (i = 0; i < drvdata->numcidc; i++)
+		state->trccidcvr[i] = etm4x_read64(csa, TRCCIDCVRn(i));
+
+	for (i = 0; i < drvdata->numvmidc; i++)
+		state->trcvmidcvr[i] = etm4x_read64(csa, TRCVMIDCVRn(i));
+
+	state->trccidcctlr0 = etm4x_read32(csa, TRCCIDCCTLR0);
+	if (drvdata->numcidc > 4)
+		state->trccidcctlr1 = etm4x_read32(csa, TRCCIDCCTLR1);
+
+	state->trcvmidcctlr0 = etm4x_read32(csa, TRCVMIDCCTLR0);
+	if (drvdata->numvmidc > 4)
+		state->trcvmidcctlr0 = etm4x_read32(csa, TRCVMIDCCTLR1);
+
+	state->trcclaimset = etm4x_read32(csa, TRCCLAIMCLR);
+
+	if (!drvdata->skip_power_up)
+		state->trcpdcr = etm4x_read32(csa, TRCPDCR);
+
+	/* wait for TRCSTATR.IDLE to go up */
+	if (coresight_timeout(csa, TRCSTATR, TRCSTATR_IDLE_BIT, 1)) {
+		dev_err(etm_dev,
+			"timeout while waiting for Idle Trace Status\n");
+		etm4_os_unlock(drvdata);
+		ret = -EBUSY;
+		goto out;
+	}
+
+	drvdata->state_needs_restore = true;
+
+	/*
+	 * Power can be removed from the trace unit now. We do this to
+	 * potentially save power on systems that respect the TRCPDCR_PU
+	 * despite requesting software to save/restore state.
+	 */
+	if (!drvdata->skip_power_up)
+		etm4x_relaxed_write32(csa, (state->trcpdcr & ~TRCPDCR_PU),
+				      TRCPDCR);
+out:
+	etm4_cs_lock(drvdata, csa);
+	return ret;
+}
+
+static int etm4_cpu_save(struct etmv4_drvdata *drvdata)
+{
+	int ret = 0;
+
+	/* Save the TRFCR irrespective of whether the ETM is ON */
+	if (drvdata->trfcr)
+		drvdata->save_trfcr = read_trfcr();
+	/*
+	 * Save and restore the ETM Trace registers only if
+	 * the ETM is active.
+	 */
+	if (local_read(&drvdata->mode) && drvdata->save_state)
+		ret = __etm4_cpu_save(drvdata);
+	return ret;
+}
+
+static void __etm4_cpu_restore(struct etmv4_drvdata *drvdata)
+{
+	int i;
+	struct etmv4_save_state *state = drvdata->save_state;
+	struct csdev_access tmp_csa = CSDEV_ACCESS_IOMEM(drvdata->base);
+	struct csdev_access *csa = &tmp_csa;
+
+	etm4_cs_unlock(drvdata, csa);
+	etm4x_relaxed_write32(csa, state->trcclaimset, TRCCLAIMSET);
+
+	etm4x_relaxed_write32(csa, state->trcprgctlr, TRCPRGCTLR);
+	if (drvdata->nr_pe)
+		etm4x_relaxed_write32(csa, state->trcprocselr, TRCPROCSELR);
+	etm4x_relaxed_write32(csa, state->trcconfigr, TRCCONFIGR);
+	etm4x_relaxed_write32(csa, state->trcauxctlr, TRCAUXCTLR);
+	etm4x_relaxed_write32(csa, state->trceventctl0r, TRCEVENTCTL0R);
+	etm4x_relaxed_write32(csa, state->trceventctl1r, TRCEVENTCTL1R);
+	if (drvdata->stallctl)
+		etm4x_relaxed_write32(csa, state->trcstallctlr, TRCSTALLCTLR);
+	etm4x_relaxed_write32(csa, state->trctsctlr, TRCTSCTLR);
+	etm4x_relaxed_write32(csa, state->trcsyncpr, TRCSYNCPR);
+	etm4x_relaxed_write32(csa, state->trcccctlr, TRCCCCTLR);
+	etm4x_relaxed_write32(csa, state->trcbbctlr, TRCBBCTLR);
+	etm4x_relaxed_write32(csa, state->trctraceidr, TRCTRACEIDR);
+	etm4x_relaxed_write32(csa, state->trcqctlr, TRCQCTLR);
+
+	etm4x_relaxed_write32(csa, state->trcvictlr, TRCVICTLR);
+	etm4x_relaxed_write32(csa, state->trcviiectlr, TRCVIIECTLR);
+	etm4x_relaxed_write32(csa, state->trcvissctlr, TRCVISSCTLR);
+	if (drvdata->nr_pe_cmp)
+		etm4x_relaxed_write32(csa, state->trcvipcssctlr, TRCVIPCSSCTLR);
+	etm4x_relaxed_write32(csa, state->trcvdctlr, TRCVDCTLR);
+	etm4x_relaxed_write32(csa, state->trcvdsacctlr, TRCVDSACCTLR);
+	etm4x_relaxed_write32(csa, state->trcvdarcctlr, TRCVDARCCTLR);
+
+	for (i = 0; i < drvdata->nrseqstate - 1; i++)
+		etm4x_relaxed_write32(csa, state->trcseqevr[i], TRCSEQEVRn(i));
+
+	if (drvdata->nrseqstate) {
+		etm4x_relaxed_write32(csa, state->trcseqrstevr, TRCSEQRSTEVR);
+		etm4x_relaxed_write32(csa, state->trcseqstr, TRCSEQSTR);
+	}
+	etm4x_relaxed_write32(csa, state->trcextinselr, TRCEXTINSELR);
+
+	for (i = 0; i < drvdata->nr_cntr; i++) {
+		etm4x_relaxed_write32(csa, state->trccntrldvr[i], TRCCNTRLDVRn(i));
+		etm4x_relaxed_write32(csa, state->trccntctlr[i], TRCCNTCTLRn(i));
+		etm4x_relaxed_write32(csa, state->trccntvr[i], TRCCNTVRn(i));
+	}
+
+	for (i = 0; i < drvdata->nr_resource * 2; i++)
+		etm4x_relaxed_write32(csa, state->trcrsctlr[i], TRCRSCTLRn(i));
+
+	for (i = 0; i < drvdata->nr_ss_cmp; i++) {
+		etm4x_relaxed_write32(csa, state->trcssccr[i], TRCSSCCRn(i));
+		etm4x_relaxed_write32(csa, state->trcsscsr[i], TRCSSCSRn(i));
+		if (etm4x_sspcicrn_present(drvdata, i))
+			etm4x_relaxed_write32(csa, state->trcsspcicr[i], TRCSSPCICRn(i));
+	}
+
+	for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
+		etm4x_relaxed_write64(csa, state->trcacvr[i], TRCACVRn(i));
+		etm4x_relaxed_write64(csa, state->trcacatr[i], TRCACATRn(i));
+	}
+
+	for (i = 0; i < drvdata->numcidc; i++)
+		etm4x_relaxed_write64(csa, state->trccidcvr[i], TRCCIDCVRn(i));
+
+	for (i = 0; i < drvdata->numvmidc; i++)
+		etm4x_relaxed_write64(csa, state->trcvmidcvr[i], TRCVMIDCVRn(i));
+
+	etm4x_relaxed_write32(csa, state->trccidcctlr0, TRCCIDCCTLR0);
+	if (drvdata->numcidc > 4)
+		etm4x_relaxed_write32(csa, state->trccidcctlr1, TRCCIDCCTLR1);
+
+	etm4x_relaxed_write32(csa, state->trcvmidcctlr0, TRCVMIDCCTLR0);
+	if (drvdata->numvmidc > 4)
+		etm4x_relaxed_write32(csa, state->trcvmidcctlr0, TRCVMIDCCTLR1);
+
+	etm4x_relaxed_write32(csa, state->trcclaimset, TRCCLAIMSET);
+
+	if (!drvdata->skip_power_up)
+		etm4x_relaxed_write32(csa, state->trcpdcr, TRCPDCR);
+
+	drvdata->state_needs_restore = false;
+
+	/*
+	 * As recommended by section 4.3.7 ("Synchronization when using the
+	 * memory-mapped interface") of ARM IHI 0064D
+	 */
+	dsb(sy);
+	isb();
+
+	/* Unlock the OS lock to re-enable trace and external debug access */
+	etm4_os_unlock(drvdata);
+	etm4_cs_lock(drvdata, csa);
+}
+
+static void etm4_cpu_restore(struct etmv4_drvdata *drvdata)
+{
+	if (drvdata->trfcr)
+		write_trfcr(drvdata->save_trfcr);
+	if (drvdata->state_needs_restore)
+		__etm4_cpu_restore(drvdata);
+}
+
+static int etm4_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd,
+			      void *v)
+{
+	struct etmv4_drvdata *drvdata;
+	unsigned int cpu = smp_processor_id();
+
+	if (!etmdrvdata[cpu])
+		return NOTIFY_OK;
+
+	drvdata = etmdrvdata[cpu];
+
+	if (WARN_ON_ONCE(drvdata->cpu != cpu))
+		return NOTIFY_BAD;
+
+	switch (cmd) {
+	case CPU_PM_ENTER:
+		if (etm4_cpu_save(drvdata))
+			return NOTIFY_BAD;
+		break;
+	case CPU_PM_EXIT:
+	case CPU_PM_ENTER_FAILED:
+		etm4_cpu_restore(drvdata);
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block etm4_cpu_pm_nb = {
+	.notifier_call = etm4_cpu_pm_notify,
+};
+
+/* Setup PM. Deals with error conditions and counts */
+static int __init etm4_pm_setup(void)
+{
+	int ret;
+
+	ret = cpu_pm_register_notifier(&etm4_cpu_pm_nb);
+	if (ret)
+		return ret;
+
+	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING,
+					"arm/coresight4:starting",
+					etm4_starting_cpu, etm4_dying_cpu);
+
+	if (ret)
+		goto unregister_notifier;
+
+	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+					"arm/coresight4:online",
+					etm4_online_cpu, NULL);
+
+	/* HP dyn state ID returned in ret on success */
+	if (ret > 0) {
+		hp_online = ret;
+		return 0;
+	}
+
+	/* failed dyn state - remove others */
+	cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING);
+
+unregister_notifier:
+	cpu_pm_unregister_notifier(&etm4_cpu_pm_nb);
+	return ret;
+}
+
+static void etm4_pm_clear(void)
+{
+	cpu_pm_unregister_notifier(&etm4_cpu_pm_nb);
+	cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING);
+	if (hp_online) {
+		cpuhp_remove_state_nocalls(hp_online);
+		hp_online = 0;
+	}
+}
+
+static int etm4_add_coresight_dev(struct etm4_init_arg *init_arg)
+{
+	int ret;
+	struct coresight_platform_data *pdata = NULL;
+	struct device *dev = init_arg->dev;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev);
+	struct coresight_desc desc = { 0 };
+	u8 major, minor;
+	char *type_name;
+
+	if (!drvdata)
+		return -EINVAL;
+
+	desc.access = *init_arg->csa;
+
+	if (!drvdata->arch)
+		return -EINVAL;
+
+	/* TRCPDCR is not accessible with system instructions. */
+	if (!desc.access.io_mem ||
+	    fwnode_property_present(dev_fwnode(dev), "qcom,skip-power-up"))
+		drvdata->skip_power_up = true;
+
+	major = ETM_ARCH_MAJOR_VERSION(drvdata->arch);
+	minor = ETM_ARCH_MINOR_VERSION(drvdata->arch);
+
+	if (etm4x_is_ete(drvdata)) {
+		type_name = "ete";
+		/* ETE v1 has major version == 0b101. Adjust this for logging.*/
+		major -= 4;
+	} else {
+		type_name = "etm";
+	}
+
+	desc.name = devm_kasprintf(dev, GFP_KERNEL,
+				   "%s%d", type_name, drvdata->cpu);
+	if (!desc.name)
+		return -ENOMEM;
+
+	etm4_set_default(&drvdata->config);
+
+	pdata = coresight_get_platform_data(dev);
+	if (IS_ERR(pdata))
+		return PTR_ERR(pdata);
+
+	dev->platform_data = pdata;
+
+	desc.type = CORESIGHT_DEV_TYPE_SOURCE;
+	desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_PROC;
+	desc.ops = &etm4_cs_ops;
+	desc.pdata = pdata;
+	desc.dev = dev;
+	desc.groups = coresight_etmv4_groups;
+	drvdata->csdev = coresight_register(&desc);
+	if (IS_ERR(drvdata->csdev))
+		return PTR_ERR(drvdata->csdev);
+
+	ret = etm_perf_symlink(drvdata->csdev, true);
+	if (ret) {
+		coresight_unregister(drvdata->csdev);
+		return ret;
+	}
+
+	/* register with config infrastructure & load any current features */
+	ret = etm4_cscfg_register(drvdata->csdev);
+	if (ret) {
+		coresight_unregister(drvdata->csdev);
+		return ret;
+	}
+
+	etmdrvdata[drvdata->cpu] = drvdata;
+
+	dev_info(&drvdata->csdev->dev, "CPU%d: %s v%d.%d initialized\n",
+		 drvdata->cpu, type_name, major, minor);
+
+	if (boot_enable) {
+		coresight_enable(drvdata->csdev);
+		drvdata->boot_enable = true;
+	}
+
+	return 0;
+}
+
+static int etm4_probe(struct device *dev)
+{
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev);
+	struct csdev_access access = { 0 };
+	struct etm4_init_arg init_arg = { 0 };
+	struct etm4_init_arg *delayed;
+
+	if (WARN_ON(!drvdata))
+		return -ENOMEM;
+
+	if (pm_save_enable == PARAM_PM_SAVE_FIRMWARE)
+		pm_save_enable = coresight_loses_context_with_cpu(dev) ?
+			       PARAM_PM_SAVE_SELF_HOSTED : PARAM_PM_SAVE_NEVER;
+
+	if (pm_save_enable != PARAM_PM_SAVE_NEVER) {
+		drvdata->save_state = devm_kmalloc(dev,
+				sizeof(struct etmv4_save_state), GFP_KERNEL);
+		if (!drvdata->save_state)
+			return -ENOMEM;
+	}
+
+	spin_lock_init(&drvdata->spinlock);
+
+	drvdata->cpu = coresight_get_cpu(dev);
+	if (drvdata->cpu < 0)
+		return drvdata->cpu;
+
+	init_arg.dev = dev;
+	init_arg.csa = &access;
+
+	/*
+	 * Serialize against CPUHP callbacks to avoid race condition
+	 * between the smp call and saving the delayed probe.
+	 */
+	cpus_read_lock();
+	if (smp_call_function_single(drvdata->cpu,
+				etm4_init_arch_data,  &init_arg, 1)) {
+		/* The CPU was offline, try again once it comes online. */
+		delayed = devm_kmalloc(dev, sizeof(*delayed), GFP_KERNEL);
+		if (!delayed) {
+			cpus_read_unlock();
+			return -ENOMEM;
+		}
+
+		*delayed = init_arg;
+
+		per_cpu(delayed_probe, drvdata->cpu) = delayed;
+
+		cpus_read_unlock();
+		return 0;
+	}
+	cpus_read_unlock();
+
+	return etm4_add_coresight_dev(&init_arg);
+}
+
+static int etm4_probe_amba(struct amba_device *adev, const struct amba_id *id)
+{
+	struct etmv4_drvdata *drvdata;
+	void __iomem *base;
+	struct device *dev = &adev->dev;
+	struct resource *res = &adev->res;
+	int ret;
+
+	/* Validity for the resource is already checked by the AMBA core */
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	drvdata->base = base;
+	dev_set_drvdata(dev, drvdata);
+	ret = etm4_probe(dev);
+	if (!ret)
+		pm_runtime_put(&adev->dev);
+
+	return ret;
+}
+
+static int etm4_probe_platform_dev(struct platform_device *pdev)
+{
+	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	struct etmv4_drvdata *drvdata;
+	int ret;
+
+	drvdata = devm_kzalloc(&pdev->dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	drvdata->pclk = coresight_get_enable_apb_pclk(&pdev->dev);
+	if (IS_ERR(drvdata->pclk))
+		return -ENODEV;
+
+	if (res) {
+		drvdata->base = devm_ioremap_resource(&pdev->dev, res);
+		if (IS_ERR(drvdata->base)) {
+			clk_put(drvdata->pclk);
+			return PTR_ERR(drvdata->base);
+		}
+	}
+
+	dev_set_drvdata(&pdev->dev, drvdata);
+	pm_runtime_get_noresume(&pdev->dev);
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+
+	ret = etm4_probe(&pdev->dev);
+
+	pm_runtime_put(&pdev->dev);
+	return ret;
+}
+
+static int etm4_probe_cpu(unsigned int cpu)
+{
+	int ret;
+	struct etm4_init_arg init_arg;
+	struct csdev_access access = { 0 };
+	struct etm4_init_arg *iap = *this_cpu_ptr(&delayed_probe);
+
+	if (!iap)
+		return 0;
+
+	init_arg = *iap;
+	devm_kfree(init_arg.dev, iap);
+	*this_cpu_ptr(&delayed_probe) = NULL;
+
+	ret = pm_runtime_resume_and_get(init_arg.dev);
+	if (ret < 0) {
+		dev_err(init_arg.dev, "Failed to get PM runtime!\n");
+		return 0;
+	}
+
+	init_arg.csa = &access;
+	etm4_init_arch_data(&init_arg);
+
+	etm4_add_coresight_dev(&init_arg);
+
+	pm_runtime_put(init_arg.dev);
+	return 0;
+}
+
+static struct amba_cs_uci_id uci_id_etm4[] = {
+	{
+		/*  ETMv4 UCI data */
+		.devarch	= ETM_DEVARCH_ETMv4x_ARCH,
+		.devarch_mask	= ETM_DEVARCH_ID_MASK,
+		.devtype	= CS_DEVTYPE_PE_TRACE,
+	}
+};
+
+static void clear_etmdrvdata(void *info)
+{
+	int cpu = *(int *)info;
+
+	etmdrvdata[cpu] = NULL;
+	per_cpu(delayed_probe, cpu) = NULL;
+}
+
+static void etm4_remove_dev(struct etmv4_drvdata *drvdata)
+{
+	bool had_delayed_probe;
+	/*
+	 * Taking hotplug lock here to avoid racing between etm4_remove_dev()
+	 * and CPU hotplug call backs.
+	 */
+	cpus_read_lock();
+
+	had_delayed_probe = per_cpu(delayed_probe, drvdata->cpu);
+
+	/*
+	 * The readers for etmdrvdata[] are CPU hotplug call backs
+	 * and PM notification call backs. Change etmdrvdata[i] on
+	 * CPU i ensures these call backs has consistent view
+	 * inside one call back function.
+	 */
+	if (smp_call_function_single(drvdata->cpu, clear_etmdrvdata, &drvdata->cpu, 1))
+		clear_etmdrvdata(&drvdata->cpu);
+
+	cpus_read_unlock();
+
+	if (!had_delayed_probe) {
+		etm_perf_symlink(drvdata->csdev, false);
+		cscfg_unregister_csdev(drvdata->csdev);
+		coresight_unregister(drvdata->csdev);
+	}
+}
+
+static void etm4_remove_amba(struct amba_device *adev)
+{
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(&adev->dev);
+
+	if (drvdata)
+		etm4_remove_dev(drvdata);
+}
+
+static int etm4_remove_platform_dev(struct platform_device *pdev)
+{
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(&pdev->dev);
+
+	if (drvdata)
+		etm4_remove_dev(drvdata);
+	pm_runtime_disable(&pdev->dev);
+
+	if (drvdata && !IS_ERR_OR_NULL(drvdata->pclk))
+		clk_put(drvdata->pclk);
+
+	return 0;
+}
+
+static const struct amba_id etm4_ids[] = {
+	CS_AMBA_ID(0x000bb95d),			/* Cortex-A53 */
+	CS_AMBA_ID(0x000bb95e),			/* Cortex-A57 */
+	CS_AMBA_ID(0x000bb95a),			/* Cortex-A72 */
+	CS_AMBA_ID(0x000bb959),			/* Cortex-A73 */
+	CS_AMBA_UCI_ID(0x000bb9da, uci_id_etm4),/* Cortex-A35 */
+	CS_AMBA_UCI_ID(0x000bbd05, uci_id_etm4),/* Cortex-A55 */
+	CS_AMBA_UCI_ID(0x000bbd0a, uci_id_etm4),/* Cortex-A75 */
+	CS_AMBA_UCI_ID(0x000bbd0c, uci_id_etm4),/* Neoverse N1 */
+	CS_AMBA_UCI_ID(0x000bbd41, uci_id_etm4),/* Cortex-A78 */
+	CS_AMBA_UCI_ID(0x000f0205, uci_id_etm4),/* Qualcomm Kryo */
+	CS_AMBA_UCI_ID(0x000f0211, uci_id_etm4),/* Qualcomm Kryo */
+	CS_AMBA_UCI_ID(0x000bb802, uci_id_etm4),/* Qualcomm Kryo 385 Cortex-A55 */
+	CS_AMBA_UCI_ID(0x000bb803, uci_id_etm4),/* Qualcomm Kryo 385 Cortex-A75 */
+	CS_AMBA_UCI_ID(0x000bb805, uci_id_etm4),/* Qualcomm Kryo 4XX Cortex-A55 */
+	CS_AMBA_UCI_ID(0x000bb804, uci_id_etm4),/* Qualcomm Kryo 4XX Cortex-A76 */
+	CS_AMBA_UCI_ID(0x000bbd0d, uci_id_etm4),/* Qualcomm Kryo 5XX Cortex-A77 */
+	CS_AMBA_UCI_ID(0x000cc0af, uci_id_etm4),/* Marvell ThunderX2 */
+	CS_AMBA_UCI_ID(0x000b6d01, uci_id_etm4),/* HiSilicon-Hip08 */
+	CS_AMBA_UCI_ID(0x000b6d02, uci_id_etm4),/* HiSilicon-Hip09 */
+	/*
+	 * Match all PIDs with ETM4 DEVARCH. No need for adding any of the new
+	 * CPUs to the list here.
+	 */
+	CS_AMBA_MATCH_ALL_UCI(uci_id_etm4),
+	{},
+};
+
+MODULE_DEVICE_TABLE(amba, etm4_ids);
+
+static struct amba_driver etm4x_amba_driver = {
+	.drv = {
+		.name   = "coresight-etm4x",
+		.owner  = THIS_MODULE,
+		.suppress_bind_attrs = true,
+	},
+	.probe		= etm4_probe_amba,
+	.remove         = etm4_remove_amba,
+	.id_table	= etm4_ids,
+};
+
+#ifdef CONFIG_PM
+static int etm4_runtime_suspend(struct device *dev)
+{
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata->pclk && !IS_ERR(drvdata->pclk))
+		clk_disable_unprepare(drvdata->pclk);
+
+	return 0;
+}
+
+static int etm4_runtime_resume(struct device *dev)
+{
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata->pclk && !IS_ERR(drvdata->pclk))
+		clk_prepare_enable(drvdata->pclk);
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops etm4_dev_pm_ops = {
+	SET_RUNTIME_PM_OPS(etm4_runtime_suspend, etm4_runtime_resume, NULL)
+};
+
+static const struct of_device_id etm4_sysreg_match[] = {
+	{ .compatible	= "arm,coresight-etm4x-sysreg" },
+	{ .compatible	= "arm,embedded-trace-extension" },
+	{}
+};
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id etm4x_acpi_ids[] = {
+	{"ARMHC500", 0}, /* ARM CoreSight ETM4x */
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, etm4x_acpi_ids);
+#endif
+
+static struct platform_driver etm4_platform_driver = {
+	.probe		= etm4_probe_platform_dev,
+	.remove		= etm4_remove_platform_dev,
+	.driver			= {
+		.name			= "coresight-etm4x",
+		.of_match_table		= etm4_sysreg_match,
+		.acpi_match_table	= ACPI_PTR(etm4x_acpi_ids),
+		.suppress_bind_attrs	= true,
+		.pm			= &etm4_dev_pm_ops,
+	},
+};
+
+static int __init etm4x_init(void)
+{
+	int ret;
+
+	ret = etm4_pm_setup();
+
+	/* etm4_pm_setup() does its own cleanup - exit on error */
+	if (ret)
+		return ret;
+
+	ret = amba_driver_register(&etm4x_amba_driver);
+	if (ret) {
+		pr_err("Error registering etm4x AMBA driver\n");
+		goto clear_pm;
+	}
+
+	ret = platform_driver_register(&etm4_platform_driver);
+	if (!ret)
+		return 0;
+
+	pr_err("Error registering etm4x platform driver\n");
+	amba_driver_unregister(&etm4x_amba_driver);
+
+clear_pm:
+	etm4_pm_clear();
+	return ret;
+}
+
+static void __exit etm4x_exit(void)
+{
+	amba_driver_unregister(&etm4x_amba_driver);
+	platform_driver_unregister(&etm4_platform_driver);
+	etm4_pm_clear();
+}
+
+module_init(etm4x_init);
+module_exit(etm4x_exit);
+
+MODULE_AUTHOR("Pratik Patel <pratikp@codeaurora.org>");
+MODULE_AUTHOR("Mathieu Poirier <mathieu.poirier@linaro.org>");
+MODULE_DESCRIPTION("Arm CoreSight Program Flow Trace v4.x driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
new file mode 100644
index 0000000000..a9f19629f3
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
@@ -0,0 +1,2603 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#include <linux/pid_namespace.h>
+#include <linux/pm_runtime.h>
+#include <linux/sysfs.h>
+#include "coresight-etm4x.h"
+#include "coresight-priv.h"
+#include "coresight-syscfg.h"
+
+static int etm4_set_mode_exclude(struct etmv4_drvdata *drvdata, bool exclude)
+{
+	u8 idx;
+	struct etmv4_config *config = &drvdata->config;
+
+	idx = config->addr_idx;
+
+	/*
+	 * TRCACATRn.TYPE bit[1:0]: type of comparison
+	 * the trace unit performs
+	 */
+	if (FIELD_GET(TRCACATRn_TYPE_MASK, config->addr_acc[idx]) == TRCACATRn_TYPE_ADDR) {
+		if (idx % 2 != 0)
+			return -EINVAL;
+
+		/*
+		 * We are performing instruction address comparison. Set the
+		 * relevant bit of ViewInst Include/Exclude Control register
+		 * for corresponding address comparator pair.
+		 */
+		if (config->addr_type[idx] != ETM_ADDR_TYPE_RANGE ||
+		    config->addr_type[idx + 1] != ETM_ADDR_TYPE_RANGE)
+			return -EINVAL;
+
+		if (exclude == true) {
+			/*
+			 * Set exclude bit and unset the include bit
+			 * corresponding to comparator pair
+			 */
+			config->viiectlr |= BIT(idx / 2 + 16);
+			config->viiectlr &= ~BIT(idx / 2);
+		} else {
+			/*
+			 * Set include bit and unset exclude bit
+			 * corresponding to comparator pair
+			 */
+			config->viiectlr |= BIT(idx / 2);
+			config->viiectlr &= ~BIT(idx / 2 + 16);
+		}
+	}
+	return 0;
+}
+
+static ssize_t nr_pe_cmp_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->nr_pe_cmp;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(nr_pe_cmp);
+
+static ssize_t nr_addr_cmp_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->nr_addr_cmp;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(nr_addr_cmp);
+
+static ssize_t nr_cntr_show(struct device *dev,
+			    struct device_attribute *attr,
+			    char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->nr_cntr;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(nr_cntr);
+
+static ssize_t nr_ext_inp_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->nr_ext_inp;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(nr_ext_inp);
+
+static ssize_t numcidc_show(struct device *dev,
+			    struct device_attribute *attr,
+			    char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->numcidc;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(numcidc);
+
+static ssize_t numvmidc_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->numvmidc;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(numvmidc);
+
+static ssize_t nrseqstate_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->nrseqstate;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(nrseqstate);
+
+static ssize_t nr_resource_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->nr_resource;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(nr_resource);
+
+static ssize_t nr_ss_cmp_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->nr_ss_cmp;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(nr_ss_cmp);
+
+static ssize_t reset_store(struct device *dev,
+			   struct device_attribute *attr,
+			   const char *buf, size_t size)
+{
+	int i;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	if (val)
+		config->mode = 0x0;
+
+	/* Disable data tracing: do not trace load and store data transfers */
+	config->mode &= ~(ETM_MODE_LOAD | ETM_MODE_STORE);
+	config->cfg &= ~(TRCCONFIGR_INSTP0_LOAD | TRCCONFIGR_INSTP0_STORE);
+
+	/* Disable data value and data address tracing */
+	config->mode &= ~(ETM_MODE_DATA_TRACE_ADDR |
+			   ETM_MODE_DATA_TRACE_VAL);
+	config->cfg &= ~(TRCCONFIGR_DA | TRCCONFIGR_DV);
+
+	/* Disable all events tracing */
+	config->eventctrl0 = 0x0;
+	config->eventctrl1 = 0x0;
+
+	/* Disable timestamp event */
+	config->ts_ctrl = 0x0;
+
+	/* Disable stalling */
+	config->stall_ctrl = 0x0;
+
+	/* Reset trace synchronization period  to 2^8 = 256 bytes*/
+	if (drvdata->syncpr == false)
+		config->syncfreq = 0x8;
+
+	/*
+	 * Enable ViewInst to trace everything with start-stop logic in
+	 * started state. ARM recommends start-stop logic is set before
+	 * each trace run.
+	 */
+	config->vinst_ctrl = FIELD_PREP(TRCVICTLR_EVENT_MASK, 0x01);
+	if (drvdata->nr_addr_cmp > 0) {
+		config->mode |= ETM_MODE_VIEWINST_STARTSTOP;
+		/* SSSTATUS, bit[9] */
+		config->vinst_ctrl |= TRCVICTLR_SSSTATUS;
+	}
+
+	/* No address range filtering for ViewInst */
+	config->viiectlr = 0x0;
+
+	/* No start-stop filtering for ViewInst */
+	config->vissctlr = 0x0;
+	config->vipcssctlr = 0x0;
+
+	/* Disable seq events */
+	for (i = 0; i < drvdata->nrseqstate-1; i++)
+		config->seq_ctrl[i] = 0x0;
+	config->seq_rst = 0x0;
+	config->seq_state = 0x0;
+
+	/* Disable external input events */
+	config->ext_inp = 0x0;
+
+	config->cntr_idx = 0x0;
+	for (i = 0; i < drvdata->nr_cntr; i++) {
+		config->cntrldvr[i] = 0x0;
+		config->cntr_ctrl[i] = 0x0;
+		config->cntr_val[i] = 0x0;
+	}
+
+	config->res_idx = 0x0;
+	for (i = 2; i < 2 * drvdata->nr_resource; i++)
+		config->res_ctrl[i] = 0x0;
+
+	config->ss_idx = 0x0;
+	for (i = 0; i < drvdata->nr_ss_cmp; i++) {
+		config->ss_ctrl[i] = 0x0;
+		config->ss_pe_cmp[i] = 0x0;
+	}
+
+	config->addr_idx = 0x0;
+	for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
+		config->addr_val[i] = 0x0;
+		config->addr_acc[i] = 0x0;
+		config->addr_type[i] = ETM_ADDR_TYPE_NONE;
+	}
+
+	config->ctxid_idx = 0x0;
+	for (i = 0; i < drvdata->numcidc; i++)
+		config->ctxid_pid[i] = 0x0;
+
+	config->ctxid_mask0 = 0x0;
+	config->ctxid_mask1 = 0x0;
+
+	config->vmid_idx = 0x0;
+	for (i = 0; i < drvdata->numvmidc; i++)
+		config->vmid_val[i] = 0x0;
+	config->vmid_mask0 = 0x0;
+	config->vmid_mask1 = 0x0;
+
+	spin_unlock(&drvdata->spinlock);
+
+	/* for sysfs - only release trace id when resetting */
+	etm4_release_trace_id(drvdata);
+
+	cscfg_csdev_reset_feats(to_coresight_device(dev));
+
+	return size;
+}
+static DEVICE_ATTR_WO(reset);
+
+static ssize_t mode_show(struct device *dev,
+			 struct device_attribute *attr,
+			 char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->mode;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t mode_store(struct device *dev,
+			  struct device_attribute *attr,
+			  const char *buf, size_t size)
+{
+	unsigned long val, mode;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	config->mode = val & ETMv4_MODE_ALL;
+
+	if (drvdata->instrp0 == true) {
+		/* start by clearing instruction P0 field */
+		config->cfg  &= ~TRCCONFIGR_INSTP0_LOAD_STORE;
+		if (config->mode & ETM_MODE_LOAD)
+			/* 0b01 Trace load instructions as P0 instructions */
+			config->cfg  |= TRCCONFIGR_INSTP0_LOAD;
+		if (config->mode & ETM_MODE_STORE)
+			/* 0b10 Trace store instructions as P0 instructions */
+			config->cfg  |= TRCCONFIGR_INSTP0_STORE;
+		if (config->mode & ETM_MODE_LOAD_STORE)
+			/*
+			 * 0b11 Trace load and store instructions
+			 * as P0 instructions
+			 */
+			config->cfg  |= TRCCONFIGR_INSTP0_LOAD_STORE;
+	}
+
+	/* bit[3], Branch broadcast mode */
+	if ((config->mode & ETM_MODE_BB) && (drvdata->trcbb == true))
+		config->cfg |= TRCCONFIGR_BB;
+	else
+		config->cfg &= ~TRCCONFIGR_BB;
+
+	/* bit[4], Cycle counting instruction trace bit */
+	if ((config->mode & ETMv4_MODE_CYCACC) &&
+		(drvdata->trccci == true))
+		config->cfg |= TRCCONFIGR_CCI;
+	else
+		config->cfg &= ~TRCCONFIGR_CCI;
+
+	/* bit[6], Context ID tracing bit */
+	if ((config->mode & ETMv4_MODE_CTXID) && (drvdata->ctxid_size))
+		config->cfg |= TRCCONFIGR_CID;
+	else
+		config->cfg &= ~TRCCONFIGR_CID;
+
+	if ((config->mode & ETM_MODE_VMID) && (drvdata->vmid_size))
+		config->cfg |= TRCCONFIGR_VMID;
+	else
+		config->cfg &= ~TRCCONFIGR_VMID;
+
+	/* bits[10:8], Conditional instruction tracing bit */
+	mode = ETM_MODE_COND(config->mode);
+	if (drvdata->trccond == true) {
+		config->cfg &= ~TRCCONFIGR_COND_MASK;
+		config->cfg |= mode << __bf_shf(TRCCONFIGR_COND_MASK);
+	}
+
+	/* bit[11], Global timestamp tracing bit */
+	if ((config->mode & ETMv4_MODE_TIMESTAMP) && (drvdata->ts_size))
+		config->cfg |= TRCCONFIGR_TS;
+	else
+		config->cfg &= ~TRCCONFIGR_TS;
+
+	/* bit[12], Return stack enable bit */
+	if ((config->mode & ETM_MODE_RETURNSTACK) &&
+					(drvdata->retstack == true))
+		config->cfg |= TRCCONFIGR_RS;
+	else
+		config->cfg &= ~TRCCONFIGR_RS;
+
+	/* bits[14:13], Q element enable field */
+	mode = ETM_MODE_QELEM(config->mode);
+	/* start by clearing QE bits */
+	config->cfg &= ~(TRCCONFIGR_QE_W_COUNTS | TRCCONFIGR_QE_WO_COUNTS);
+	/*
+	 * if supported, Q elements with instruction counts are enabled.
+	 * Always set the low bit for any requested mode. Valid combos are
+	 * 0b00, 0b01 and 0b11.
+	 */
+	if (mode && drvdata->q_support)
+		config->cfg |= TRCCONFIGR_QE_W_COUNTS;
+	/*
+	 * if supported, Q elements with and without instruction
+	 * counts are enabled
+	 */
+	if ((mode & BIT(1)) && (drvdata->q_support & BIT(1)))
+		config->cfg |= TRCCONFIGR_QE_WO_COUNTS;
+
+	/* bit[11], AMBA Trace Bus (ATB) trigger enable bit */
+	if ((config->mode & ETM_MODE_ATB_TRIGGER) &&
+	    (drvdata->atbtrig == true))
+		config->eventctrl1 |= TRCEVENTCTL1R_ATB;
+	else
+		config->eventctrl1 &= ~TRCEVENTCTL1R_ATB;
+
+	/* bit[12], Low-power state behavior override bit */
+	if ((config->mode & ETM_MODE_LPOVERRIDE) &&
+	    (drvdata->lpoverride == true))
+		config->eventctrl1 |= TRCEVENTCTL1R_LPOVERRIDE;
+	else
+		config->eventctrl1 &= ~TRCEVENTCTL1R_LPOVERRIDE;
+
+	/* bit[8], Instruction stall bit */
+	if ((config->mode & ETM_MODE_ISTALL_EN) && (drvdata->stallctl == true))
+		config->stall_ctrl |= TRCSTALLCTLR_ISTALL;
+	else
+		config->stall_ctrl &= ~TRCSTALLCTLR_ISTALL;
+
+	/* bit[10], Prioritize instruction trace bit */
+	if (config->mode & ETM_MODE_INSTPRIO)
+		config->stall_ctrl |= TRCSTALLCTLR_INSTPRIORITY;
+	else
+		config->stall_ctrl &= ~TRCSTALLCTLR_INSTPRIORITY;
+
+	/* bit[13], Trace overflow prevention bit */
+	if ((config->mode & ETM_MODE_NOOVERFLOW) &&
+		(drvdata->nooverflow == true))
+		config->stall_ctrl |= TRCSTALLCTLR_NOOVERFLOW;
+	else
+		config->stall_ctrl &= ~TRCSTALLCTLR_NOOVERFLOW;
+
+	/* bit[9] Start/stop logic control bit */
+	if (config->mode & ETM_MODE_VIEWINST_STARTSTOP)
+		config->vinst_ctrl |= TRCVICTLR_SSSTATUS;
+	else
+		config->vinst_ctrl &= ~TRCVICTLR_SSSTATUS;
+
+	/* bit[10], Whether a trace unit must trace a Reset exception */
+	if (config->mode & ETM_MODE_TRACE_RESET)
+		config->vinst_ctrl |= TRCVICTLR_TRCRESET;
+	else
+		config->vinst_ctrl &= ~TRCVICTLR_TRCRESET;
+
+	/* bit[11], Whether a trace unit must trace a system error exception */
+	if ((config->mode & ETM_MODE_TRACE_ERR) &&
+		(drvdata->trc_error == true))
+		config->vinst_ctrl |= TRCVICTLR_TRCERR;
+	else
+		config->vinst_ctrl &= ~TRCVICTLR_TRCERR;
+
+	if (config->mode & (ETM_MODE_EXCL_KERN | ETM_MODE_EXCL_USER))
+		etm4_config_trace_mode(config);
+
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(mode);
+
+static ssize_t pe_show(struct device *dev,
+		       struct device_attribute *attr,
+		       char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->pe_sel;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t pe_store(struct device *dev,
+			struct device_attribute *attr,
+			const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	if (val > drvdata->nr_pe) {
+		spin_unlock(&drvdata->spinlock);
+		return -EINVAL;
+	}
+
+	config->pe_sel = val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(pe);
+
+static ssize_t event_show(struct device *dev,
+			  struct device_attribute *attr,
+			  char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->eventctrl0;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t event_store(struct device *dev,
+			   struct device_attribute *attr,
+			   const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	switch (drvdata->nr_event) {
+	case 0x0:
+		/* EVENT0, bits[7:0] */
+		config->eventctrl0 = val & 0xFF;
+		break;
+	case 0x1:
+		 /* EVENT1, bits[15:8] */
+		config->eventctrl0 = val & 0xFFFF;
+		break;
+	case 0x2:
+		/* EVENT2, bits[23:16] */
+		config->eventctrl0 = val & 0xFFFFFF;
+		break;
+	case 0x3:
+		/* EVENT3, bits[31:24] */
+		config->eventctrl0 = val;
+		break;
+	default:
+		break;
+	}
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(event);
+
+static ssize_t event_instren_show(struct device *dev,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = FIELD_GET(TRCEVENTCTL1R_INSTEN_MASK, config->eventctrl1);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t event_instren_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	/* start by clearing all instruction event enable bits */
+	config->eventctrl1 &= ~TRCEVENTCTL1R_INSTEN_MASK;
+	switch (drvdata->nr_event) {
+	case 0x0:
+		/* generate Event element for event 1 */
+		config->eventctrl1 |= val & TRCEVENTCTL1R_INSTEN_1;
+		break;
+	case 0x1:
+		/* generate Event element for event 1 and 2 */
+		config->eventctrl1 |= val & (TRCEVENTCTL1R_INSTEN_0 | TRCEVENTCTL1R_INSTEN_1);
+		break;
+	case 0x2:
+		/* generate Event element for event 1, 2 and 3 */
+		config->eventctrl1 |= val & (TRCEVENTCTL1R_INSTEN_0 |
+					     TRCEVENTCTL1R_INSTEN_1 |
+					     TRCEVENTCTL1R_INSTEN_2);
+		break;
+	case 0x3:
+		/* generate Event element for all 4 events */
+		config->eventctrl1 |= val & (TRCEVENTCTL1R_INSTEN_0 |
+					     TRCEVENTCTL1R_INSTEN_1 |
+					     TRCEVENTCTL1R_INSTEN_2 |
+					     TRCEVENTCTL1R_INSTEN_3);
+		break;
+	default:
+		break;
+	}
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(event_instren);
+
+static ssize_t event_ts_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->ts_ctrl;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t event_ts_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if (!drvdata->ts_size)
+		return -EINVAL;
+
+	config->ts_ctrl = val & ETMv4_EVENT_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(event_ts);
+
+static ssize_t syncfreq_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->syncfreq;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t syncfreq_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if (drvdata->syncpr == true)
+		return -EINVAL;
+
+	config->syncfreq = val & ETMv4_SYNC_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(syncfreq);
+
+static ssize_t cyc_threshold_show(struct device *dev,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->ccctlr;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t cyc_threshold_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	/* mask off max threshold before checking min value */
+	val &= ETM_CYC_THRESHOLD_MASK;
+	if (val < drvdata->ccitmin)
+		return -EINVAL;
+
+	config->ccctlr = val;
+	return size;
+}
+static DEVICE_ATTR_RW(cyc_threshold);
+
+static ssize_t bb_ctrl_show(struct device *dev,
+			    struct device_attribute *attr,
+			    char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->bb_ctrl;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t bb_ctrl_store(struct device *dev,
+			     struct device_attribute *attr,
+			     const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if (drvdata->trcbb == false)
+		return -EINVAL;
+	if (!drvdata->nr_addr_cmp)
+		return -EINVAL;
+
+	/*
+	 * Bit[8] controls include(1) / exclude(0), bits[0-7] select
+	 * individual range comparators. If include then at least 1
+	 * range must be selected.
+	 */
+	if ((val & TRCBBCTLR_MODE) && (FIELD_GET(TRCBBCTLR_RANGE_MASK, val) == 0))
+		return -EINVAL;
+
+	config->bb_ctrl = val & (TRCBBCTLR_MODE | TRCBBCTLR_RANGE_MASK);
+	return size;
+}
+static DEVICE_ATTR_RW(bb_ctrl);
+
+static ssize_t event_vinst_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = FIELD_GET(TRCVICTLR_EVENT_MASK, config->vinst_ctrl);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t event_vinst_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	val &= TRCVICTLR_EVENT_MASK >> __bf_shf(TRCVICTLR_EVENT_MASK);
+	config->vinst_ctrl &= ~TRCVICTLR_EVENT_MASK;
+	config->vinst_ctrl |= FIELD_PREP(TRCVICTLR_EVENT_MASK, val);
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(event_vinst);
+
+static ssize_t s_exlevel_vinst_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = FIELD_GET(TRCVICTLR_EXLEVEL_S_MASK, config->vinst_ctrl);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t s_exlevel_vinst_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	/* clear all EXLEVEL_S bits  */
+	config->vinst_ctrl &= ~TRCVICTLR_EXLEVEL_S_MASK;
+	/* enable instruction tracing for corresponding exception level */
+	val &= drvdata->s_ex_level;
+	config->vinst_ctrl |= val << __bf_shf(TRCVICTLR_EXLEVEL_S_MASK);
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(s_exlevel_vinst);
+
+static ssize_t ns_exlevel_vinst_show(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	/* EXLEVEL_NS, bits[23:20] */
+	val = FIELD_GET(TRCVICTLR_EXLEVEL_NS_MASK, config->vinst_ctrl);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t ns_exlevel_vinst_store(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	/* clear EXLEVEL_NS bits  */
+	config->vinst_ctrl &= ~TRCVICTLR_EXLEVEL_NS_MASK;
+	/* enable instruction tracing for corresponding exception level */
+	val &= drvdata->ns_ex_level;
+	config->vinst_ctrl |= val << __bf_shf(TRCVICTLR_EXLEVEL_NS_MASK);
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(ns_exlevel_vinst);
+
+static ssize_t addr_idx_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->addr_idx;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t addr_idx_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if (val >= drvdata->nr_addr_cmp * 2)
+		return -EINVAL;
+
+	/*
+	 * Use spinlock to ensure index doesn't change while it gets
+	 * dereferenced multiple times within a spinlock block elsewhere.
+	 */
+	spin_lock(&drvdata->spinlock);
+	config->addr_idx = val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(addr_idx);
+
+static ssize_t addr_instdatatype_show(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
+{
+	ssize_t len;
+	u8 val, idx;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	val = FIELD_GET(TRCACATRn_TYPE_MASK, config->addr_acc[idx]);
+	len = scnprintf(buf, PAGE_SIZE, "%s\n",
+			val == TRCACATRn_TYPE_ADDR ? "instr" :
+			(val == TRCACATRn_TYPE_DATA_LOAD_ADDR ? "data_load" :
+			(val == TRCACATRn_TYPE_DATA_STORE_ADDR ? "data_store" :
+			"data_load_store")));
+	spin_unlock(&drvdata->spinlock);
+	return len;
+}
+
+static ssize_t addr_instdatatype_store(struct device *dev,
+				       struct device_attribute *attr,
+				       const char *buf, size_t size)
+{
+	u8 idx;
+	char str[20] = "";
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (strlen(buf) >= 20)
+		return -EINVAL;
+	if (sscanf(buf, "%s", str) != 1)
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (!strcmp(str, "instr"))
+		/* TYPE, bits[1:0] */
+		config->addr_acc[idx] &= ~TRCACATRn_TYPE_MASK;
+
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(addr_instdatatype);
+
+static ssize_t addr_single_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	idx = config->addr_idx;
+	spin_lock(&drvdata->spinlock);
+	if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      config->addr_type[idx] == ETM_ADDR_TYPE_SINGLE)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+	val = (unsigned long)config->addr_val[idx];
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t addr_single_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      config->addr_type[idx] == ETM_ADDR_TYPE_SINGLE)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	config->addr_val[idx] = (u64)val;
+	config->addr_type[idx] = ETM_ADDR_TYPE_SINGLE;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(addr_single);
+
+static ssize_t addr_range_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buf)
+{
+	u8 idx;
+	unsigned long val1, val2;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (idx % 2 != 0) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+	if (!((config->addr_type[idx] == ETM_ADDR_TYPE_NONE &&
+	       config->addr_type[idx + 1] == ETM_ADDR_TYPE_NONE) ||
+	      (config->addr_type[idx] == ETM_ADDR_TYPE_RANGE &&
+	       config->addr_type[idx + 1] == ETM_ADDR_TYPE_RANGE))) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	val1 = (unsigned long)config->addr_val[idx];
+	val2 = (unsigned long)config->addr_val[idx + 1];
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx %#lx\n", val1, val2);
+}
+
+static ssize_t addr_range_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long val1, val2;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+	int elements, exclude;
+
+	elements = sscanf(buf, "%lx %lx %x", &val1, &val2, &exclude);
+
+	/*  exclude is optional, but need at least two parameter */
+	if (elements < 2)
+		return -EINVAL;
+	/* lower address comparator cannot have a higher address value */
+	if (val1 > val2)
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (idx % 2 != 0) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	if (!((config->addr_type[idx] == ETM_ADDR_TYPE_NONE &&
+	       config->addr_type[idx + 1] == ETM_ADDR_TYPE_NONE) ||
+	      (config->addr_type[idx] == ETM_ADDR_TYPE_RANGE &&
+	       config->addr_type[idx + 1] == ETM_ADDR_TYPE_RANGE))) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	config->addr_val[idx] = (u64)val1;
+	config->addr_type[idx] = ETM_ADDR_TYPE_RANGE;
+	config->addr_val[idx + 1] = (u64)val2;
+	config->addr_type[idx + 1] = ETM_ADDR_TYPE_RANGE;
+	/*
+	 * Program include or exclude control bits for vinst or vdata
+	 * whenever we change addr comparators to ETM_ADDR_TYPE_RANGE
+	 * use supplied value, or default to bit set in 'mode'
+	 */
+	if (elements != 3)
+		exclude = config->mode & ETM_MODE_EXCLUDE;
+	etm4_set_mode_exclude(drvdata, exclude ? true : false);
+
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(addr_range);
+
+static ssize_t addr_start_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+
+	if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      config->addr_type[idx] == ETM_ADDR_TYPE_START)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	val = (unsigned long)config->addr_val[idx];
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t addr_start_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (!drvdata->nr_addr_cmp) {
+		spin_unlock(&drvdata->spinlock);
+		return -EINVAL;
+	}
+	if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      config->addr_type[idx] == ETM_ADDR_TYPE_START)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	config->addr_val[idx] = (u64)val;
+	config->addr_type[idx] = ETM_ADDR_TYPE_START;
+	config->vissctlr |= BIT(idx);
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(addr_start);
+
+static ssize_t addr_stop_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+
+	if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      config->addr_type[idx] == ETM_ADDR_TYPE_STOP)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	val = (unsigned long)config->addr_val[idx];
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t addr_stop_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (!drvdata->nr_addr_cmp) {
+		spin_unlock(&drvdata->spinlock);
+		return -EINVAL;
+	}
+	if (!(config->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	       config->addr_type[idx] == ETM_ADDR_TYPE_STOP)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	config->addr_val[idx] = (u64)val;
+	config->addr_type[idx] = ETM_ADDR_TYPE_STOP;
+	config->vissctlr |= BIT(idx + 16);
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(addr_stop);
+
+static ssize_t addr_ctxtype_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	ssize_t len;
+	u8 idx, val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	/* CONTEXTTYPE, bits[3:2] */
+	val = FIELD_GET(TRCACATRn_CONTEXTTYPE_MASK, config->addr_acc[idx]);
+	len = scnprintf(buf, PAGE_SIZE, "%s\n", val == ETM_CTX_NONE ? "none" :
+			(val == ETM_CTX_CTXID ? "ctxid" :
+			(val == ETM_CTX_VMID ? "vmid" : "all")));
+	spin_unlock(&drvdata->spinlock);
+	return len;
+}
+
+static ssize_t addr_ctxtype_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	u8 idx;
+	char str[10] = "";
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (strlen(buf) >= 10)
+		return -EINVAL;
+	if (sscanf(buf, "%s", str) != 1)
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	if (!strcmp(str, "none"))
+		/* start by clearing context type bits */
+		config->addr_acc[idx] &= ~TRCACATRn_CONTEXTTYPE_MASK;
+	else if (!strcmp(str, "ctxid")) {
+		/* 0b01 The trace unit performs a Context ID */
+		if (drvdata->numcidc) {
+			config->addr_acc[idx] |= TRCACATRn_CONTEXTTYPE_CTXID;
+			config->addr_acc[idx] &= ~TRCACATRn_CONTEXTTYPE_VMID;
+		}
+	} else if (!strcmp(str, "vmid")) {
+		/* 0b10 The trace unit performs a VMID */
+		if (drvdata->numvmidc) {
+			config->addr_acc[idx] &= ~TRCACATRn_CONTEXTTYPE_CTXID;
+			config->addr_acc[idx] |= TRCACATRn_CONTEXTTYPE_VMID;
+		}
+	} else if (!strcmp(str, "all")) {
+		/*
+		 * 0b11 The trace unit performs a Context ID
+		 * comparison and a VMID
+		 */
+		if (drvdata->numcidc)
+			config->addr_acc[idx] |= TRCACATRn_CONTEXTTYPE_CTXID;
+		if (drvdata->numvmidc)
+			config->addr_acc[idx] |= TRCACATRn_CONTEXTTYPE_VMID;
+	}
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(addr_ctxtype);
+
+static ssize_t addr_context_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	/* context ID comparator bits[6:4] */
+	val = FIELD_GET(TRCACATRn_CONTEXT_MASK, config->addr_acc[idx]);
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t addr_context_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if ((drvdata->numcidc <= 1) && (drvdata->numvmidc <= 1))
+		return -EINVAL;
+	if (val >=  (drvdata->numcidc >= drvdata->numvmidc ?
+		     drvdata->numcidc : drvdata->numvmidc))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	/* clear context ID comparator bits[6:4] */
+	config->addr_acc[idx] &= ~TRCACATRn_CONTEXT_MASK;
+	config->addr_acc[idx] |= val << __bf_shf(TRCACATRn_CONTEXT_MASK);
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(addr_context);
+
+static ssize_t addr_exlevel_s_ns_show(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	val = FIELD_GET(TRCACATRn_EXLEVEL_MASK, config->addr_acc[idx]);
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t addr_exlevel_s_ns_store(struct device *dev,
+				       struct device_attribute *attr,
+				       const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 0, &val))
+		return -EINVAL;
+
+	if (val & ~(TRCACATRn_EXLEVEL_MASK >> __bf_shf(TRCACATRn_EXLEVEL_MASK)))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	/* clear Exlevel_ns & Exlevel_s bits[14:12, 11:8], bit[15] is res0 */
+	config->addr_acc[idx] &= ~TRCACATRn_EXLEVEL_MASK;
+	config->addr_acc[idx] |= val << __bf_shf(TRCACATRn_EXLEVEL_MASK);
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(addr_exlevel_s_ns);
+
+static const char * const addr_type_names[] = {
+	"unused",
+	"single",
+	"range",
+	"start",
+	"stop"
+};
+
+static ssize_t addr_cmp_view_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	u8 idx, addr_type;
+	unsigned long addr_v, addr_v2, addr_ctrl;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+	int size = 0;
+	bool exclude = false;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->addr_idx;
+	addr_v = config->addr_val[idx];
+	addr_ctrl = config->addr_acc[idx];
+	addr_type = config->addr_type[idx];
+	if (addr_type == ETM_ADDR_TYPE_RANGE) {
+		if (idx & 0x1) {
+			idx -= 1;
+			addr_v2 = addr_v;
+			addr_v = config->addr_val[idx];
+		} else {
+			addr_v2 = config->addr_val[idx + 1];
+		}
+		exclude = config->viiectlr & BIT(idx / 2 + 16);
+	}
+	spin_unlock(&drvdata->spinlock);
+	if (addr_type) {
+		size = scnprintf(buf, PAGE_SIZE, "addr_cmp[%i] %s %#lx", idx,
+				 addr_type_names[addr_type], addr_v);
+		if (addr_type == ETM_ADDR_TYPE_RANGE) {
+			size += scnprintf(buf + size, PAGE_SIZE - size,
+					  " %#lx %s", addr_v2,
+					  exclude ? "exclude" : "include");
+		}
+		size += scnprintf(buf + size, PAGE_SIZE - size,
+				  " ctrl(%#lx)\n", addr_ctrl);
+	} else {
+		size = scnprintf(buf, PAGE_SIZE, "addr_cmp[%i] unused\n", idx);
+	}
+	return size;
+}
+static DEVICE_ATTR_RO(addr_cmp_view);
+
+static ssize_t vinst_pe_cmp_start_stop_show(struct device *dev,
+					    struct device_attribute *attr,
+					    char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (!drvdata->nr_pe_cmp)
+		return -EINVAL;
+	val = config->vipcssctlr;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+static ssize_t vinst_pe_cmp_start_stop_store(struct device *dev,
+					     struct device_attribute *attr,
+					     const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if (!drvdata->nr_pe_cmp)
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	config->vipcssctlr = val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(vinst_pe_cmp_start_stop);
+
+static ssize_t seq_idx_show(struct device *dev,
+			    struct device_attribute *attr,
+			    char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->seq_idx;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t seq_idx_store(struct device *dev,
+			     struct device_attribute *attr,
+			     const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if (val >= drvdata->nrseqstate - 1)
+		return -EINVAL;
+
+	/*
+	 * Use spinlock to ensure index doesn't change while it gets
+	 * dereferenced multiple times within a spinlock block elsewhere.
+	 */
+	spin_lock(&drvdata->spinlock);
+	config->seq_idx = val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(seq_idx);
+
+static ssize_t seq_state_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->seq_state;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t seq_state_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if (val >= drvdata->nrseqstate)
+		return -EINVAL;
+
+	config->seq_state = val;
+	return size;
+}
+static DEVICE_ATTR_RW(seq_state);
+
+static ssize_t seq_event_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->seq_idx;
+	val = config->seq_ctrl[idx];
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t seq_event_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->seq_idx;
+	/* Seq control has two masks B[15:8] F[7:0] */
+	config->seq_ctrl[idx] = val & 0xFFFF;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(seq_event);
+
+static ssize_t seq_reset_event_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->seq_rst;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t seq_reset_event_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if (!(drvdata->nrseqstate))
+		return -EINVAL;
+
+	config->seq_rst = val & ETMv4_EVENT_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(seq_reset_event);
+
+static ssize_t cntr_idx_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->cntr_idx;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t cntr_idx_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if (val >= drvdata->nr_cntr)
+		return -EINVAL;
+
+	/*
+	 * Use spinlock to ensure index doesn't change while it gets
+	 * dereferenced multiple times within a spinlock block elsewhere.
+	 */
+	spin_lock(&drvdata->spinlock);
+	config->cntr_idx = val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(cntr_idx);
+
+static ssize_t cntrldvr_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->cntr_idx;
+	val = config->cntrldvr[idx];
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t cntrldvr_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if (val > ETM_CNTR_MAX_VAL)
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->cntr_idx;
+	config->cntrldvr[idx] = val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(cntrldvr);
+
+static ssize_t cntr_val_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->cntr_idx;
+	val = config->cntr_val[idx];
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t cntr_val_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if (val > ETM_CNTR_MAX_VAL)
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->cntr_idx;
+	config->cntr_val[idx] = val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(cntr_val);
+
+static ssize_t cntr_ctrl_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->cntr_idx;
+	val = config->cntr_ctrl[idx];
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t cntr_ctrl_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->cntr_idx;
+	config->cntr_ctrl[idx] = val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(cntr_ctrl);
+
+static ssize_t res_idx_show(struct device *dev,
+			    struct device_attribute *attr,
+			    char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->res_idx;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t res_idx_store(struct device *dev,
+			     struct device_attribute *attr,
+			     const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	/*
+	 * Resource selector pair 0 is always implemented and reserved,
+	 * namely an idx with 0 and 1 is illegal.
+	 */
+	if ((val < 2) || (val >= 2 * drvdata->nr_resource))
+		return -EINVAL;
+
+	/*
+	 * Use spinlock to ensure index doesn't change while it gets
+	 * dereferenced multiple times within a spinlock block elsewhere.
+	 */
+	spin_lock(&drvdata->spinlock);
+	config->res_idx = val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(res_idx);
+
+static ssize_t res_ctrl_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->res_idx;
+	val = config->res_ctrl[idx];
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t res_ctrl_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->res_idx;
+	/* For odd idx pair inversal bit is RES0 */
+	if (idx % 2 != 0)
+		/* PAIRINV, bit[21] */
+		val &= ~TRCRSCTLRn_PAIRINV;
+	config->res_ctrl[idx] = val & (TRCRSCTLRn_PAIRINV |
+				       TRCRSCTLRn_INV |
+				       TRCRSCTLRn_GROUP_MASK |
+				       TRCRSCTLRn_SELECT_MASK);
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(res_ctrl);
+
+static ssize_t sshot_idx_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->ss_idx;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t sshot_idx_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if (val >= drvdata->nr_ss_cmp)
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	config->ss_idx = val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(sshot_idx);
+
+static ssize_t sshot_ctrl_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	val = config->ss_ctrl[config->ss_idx];
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t sshot_ctrl_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->ss_idx;
+	config->ss_ctrl[idx] = FIELD_PREP(TRCSSCCRn_SAC_ARC_RST_MASK, val);
+	/* must clear bit 31 in related status register on programming */
+	config->ss_status[idx] &= ~TRCSSCSRn_STATUS;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(sshot_ctrl);
+
+static ssize_t sshot_status_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	val = config->ss_status[config->ss_idx];
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(sshot_status);
+
+static ssize_t sshot_pe_ctrl_show(struct device *dev,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	spin_lock(&drvdata->spinlock);
+	val = config->ss_pe_cmp[config->ss_idx];
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t sshot_pe_ctrl_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->ss_idx;
+	config->ss_pe_cmp[idx] = FIELD_PREP(TRCSSPCICRn_PC_MASK, val);
+	/* must clear bit 31 in related status register on programming */
+	config->ss_status[idx] &= ~TRCSSCSRn_STATUS;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(sshot_pe_ctrl);
+
+static ssize_t ctxid_idx_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->ctxid_idx;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t ctxid_idx_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if (val >= drvdata->numcidc)
+		return -EINVAL;
+
+	/*
+	 * Use spinlock to ensure index doesn't change while it gets
+	 * dereferenced multiple times within a spinlock block elsewhere.
+	 */
+	spin_lock(&drvdata->spinlock);
+	config->ctxid_idx = val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(ctxid_idx);
+
+static ssize_t ctxid_pid_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	/*
+	 * Don't use contextID tracing if coming from a PID namespace.  See
+	 * comment in ctxid_pid_store().
+	 */
+	if (task_active_pid_ns(current) != &init_pid_ns)
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->ctxid_idx;
+	val = (unsigned long)config->ctxid_pid[idx];
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t ctxid_pid_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long pid;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	/*
+	 * When contextID tracing is enabled the tracers will insert the
+	 * value found in the contextID register in the trace stream.  But if
+	 * a process is in a namespace the PID of that process as seen from the
+	 * namespace won't be what the kernel sees, something that makes the
+	 * feature confusing and can potentially leak kernel only information.
+	 * As such refuse to use the feature if @current is not in the initial
+	 * PID namespace.
+	 */
+	if (task_active_pid_ns(current) != &init_pid_ns)
+		return -EINVAL;
+
+	/*
+	 * only implemented when ctxid tracing is enabled, i.e. at least one
+	 * ctxid comparator is implemented and ctxid is greater than 0 bits
+	 * in length
+	 */
+	if (!drvdata->ctxid_size || !drvdata->numcidc)
+		return -EINVAL;
+	if (kstrtoul(buf, 16, &pid))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = config->ctxid_idx;
+	config->ctxid_pid[idx] = (u64)pid;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(ctxid_pid);
+
+static ssize_t ctxid_masks_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	unsigned long val1, val2;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	/*
+	 * Don't use contextID tracing if coming from a PID namespace.  See
+	 * comment in ctxid_pid_store().
+	 */
+	if (task_active_pid_ns(current) != &init_pid_ns)
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	val1 = config->ctxid_mask0;
+	val2 = config->ctxid_mask1;
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx %#lx\n", val1, val2);
+}
+
+static ssize_t ctxid_masks_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t size)
+{
+	u8 i, j, maskbyte;
+	unsigned long val1, val2, mask;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+	int nr_inputs;
+
+	/*
+	 * Don't use contextID tracing if coming from a PID namespace.  See
+	 * comment in ctxid_pid_store().
+	 */
+	if (task_active_pid_ns(current) != &init_pid_ns)
+		return -EINVAL;
+
+	/*
+	 * only implemented when ctxid tracing is enabled, i.e. at least one
+	 * ctxid comparator is implemented and ctxid is greater than 0 bits
+	 * in length
+	 */
+	if (!drvdata->ctxid_size || !drvdata->numcidc)
+		return -EINVAL;
+	/* one mask if <= 4 comparators, two for up to 8 */
+	nr_inputs = sscanf(buf, "%lx %lx", &val1, &val2);
+	if ((drvdata->numcidc > 4) && (nr_inputs != 2))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	/*
+	 * each byte[0..3] controls mask value applied to ctxid
+	 * comparator[0..3]
+	 */
+	switch (drvdata->numcidc) {
+	case 0x1:
+		/* COMP0, bits[7:0] */
+		config->ctxid_mask0 = val1 & 0xFF;
+		break;
+	case 0x2:
+		/* COMP1, bits[15:8] */
+		config->ctxid_mask0 = val1 & 0xFFFF;
+		break;
+	case 0x3:
+		/* COMP2, bits[23:16] */
+		config->ctxid_mask0 = val1 & 0xFFFFFF;
+		break;
+	case 0x4:
+		 /* COMP3, bits[31:24] */
+		config->ctxid_mask0 = val1;
+		break;
+	case 0x5:
+		/* COMP4, bits[7:0] */
+		config->ctxid_mask0 = val1;
+		config->ctxid_mask1 = val2 & 0xFF;
+		break;
+	case 0x6:
+		/* COMP5, bits[15:8] */
+		config->ctxid_mask0 = val1;
+		config->ctxid_mask1 = val2 & 0xFFFF;
+		break;
+	case 0x7:
+		/* COMP6, bits[23:16] */
+		config->ctxid_mask0 = val1;
+		config->ctxid_mask1 = val2 & 0xFFFFFF;
+		break;
+	case 0x8:
+		/* COMP7, bits[31:24] */
+		config->ctxid_mask0 = val1;
+		config->ctxid_mask1 = val2;
+		break;
+	default:
+		break;
+	}
+	/*
+	 * If software sets a mask bit to 1, it must program relevant byte
+	 * of ctxid comparator value 0x0, otherwise behavior is unpredictable.
+	 * For example, if bit[3] of ctxid_mask0 is 1, we must clear bits[31:24]
+	 * of ctxid comparator0 value (corresponding to byte 0) register.
+	 */
+	mask = config->ctxid_mask0;
+	for (i = 0; i < drvdata->numcidc; i++) {
+		/* mask value of corresponding ctxid comparator */
+		maskbyte = mask & ETMv4_EVENT_MASK;
+		/*
+		 * each bit corresponds to a byte of respective ctxid comparator
+		 * value register
+		 */
+		for (j = 0; j < 8; j++) {
+			if (maskbyte & 1)
+				config->ctxid_pid[i] &= ~(0xFFUL << (j * 8));
+			maskbyte >>= 1;
+		}
+		/* Select the next ctxid comparator mask value */
+		if (i == 3)
+			/* ctxid comparators[4-7] */
+			mask = config->ctxid_mask1;
+		else
+			mask >>= 0x8;
+	}
+
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(ctxid_masks);
+
+static ssize_t vmid_idx_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	val = config->vmid_idx;
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t vmid_idx_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+	if (val >= drvdata->numvmidc)
+		return -EINVAL;
+
+	/*
+	 * Use spinlock to ensure index doesn't change while it gets
+	 * dereferenced multiple times within a spinlock block elsewhere.
+	 */
+	spin_lock(&drvdata->spinlock);
+	config->vmid_idx = val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(vmid_idx);
+
+static ssize_t vmid_val_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	/*
+	 * Don't use virtual contextID tracing if coming from a PID namespace.
+	 * See comment in ctxid_pid_store().
+	 */
+	if (!task_is_in_init_pid_ns(current))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	val = (unsigned long)config->vmid_val[config->vmid_idx];
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t vmid_val_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	unsigned long val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	/*
+	 * Don't use virtual contextID tracing if coming from a PID namespace.
+	 * See comment in ctxid_pid_store().
+	 */
+	if (!task_is_in_init_pid_ns(current))
+		return -EINVAL;
+
+	/*
+	 * only implemented when vmid tracing is enabled, i.e. at least one
+	 * vmid comparator is implemented and at least 8 bit vmid size
+	 */
+	if (!drvdata->vmid_size || !drvdata->numvmidc)
+		return -EINVAL;
+	if (kstrtoul(buf, 16, &val))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	config->vmid_val[config->vmid_idx] = (u64)val;
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(vmid_val);
+
+static ssize_t vmid_masks_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	unsigned long val1, val2;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+
+	/*
+	 * Don't use virtual contextID tracing if coming from a PID namespace.
+	 * See comment in ctxid_pid_store().
+	 */
+	if (!task_is_in_init_pid_ns(current))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	val1 = config->vmid_mask0;
+	val2 = config->vmid_mask1;
+	spin_unlock(&drvdata->spinlock);
+	return scnprintf(buf, PAGE_SIZE, "%#lx %#lx\n", val1, val2);
+}
+
+static ssize_t vmid_masks_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t size)
+{
+	u8 i, j, maskbyte;
+	unsigned long val1, val2, mask;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct etmv4_config *config = &drvdata->config;
+	int nr_inputs;
+
+	/*
+	 * Don't use virtual contextID tracing if coming from a PID namespace.
+	 * See comment in ctxid_pid_store().
+	 */
+	if (!task_is_in_init_pid_ns(current))
+		return -EINVAL;
+
+	/*
+	 * only implemented when vmid tracing is enabled, i.e. at least one
+	 * vmid comparator is implemented and at least 8 bit vmid size
+	 */
+	if (!drvdata->vmid_size || !drvdata->numvmidc)
+		return -EINVAL;
+	/* one mask if <= 4 comparators, two for up to 8 */
+	nr_inputs = sscanf(buf, "%lx %lx", &val1, &val2);
+	if ((drvdata->numvmidc > 4) && (nr_inputs != 2))
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+
+	/*
+	 * each byte[0..3] controls mask value applied to vmid
+	 * comparator[0..3]
+	 */
+	switch (drvdata->numvmidc) {
+	case 0x1:
+		/* COMP0, bits[7:0] */
+		config->vmid_mask0 = val1 & 0xFF;
+		break;
+	case 0x2:
+		/* COMP1, bits[15:8] */
+		config->vmid_mask0 = val1 & 0xFFFF;
+		break;
+	case 0x3:
+		/* COMP2, bits[23:16] */
+		config->vmid_mask0 = val1 & 0xFFFFFF;
+		break;
+	case 0x4:
+		/* COMP3, bits[31:24] */
+		config->vmid_mask0 = val1;
+		break;
+	case 0x5:
+		/* COMP4, bits[7:0] */
+		config->vmid_mask0 = val1;
+		config->vmid_mask1 = val2 & 0xFF;
+		break;
+	case 0x6:
+		/* COMP5, bits[15:8] */
+		config->vmid_mask0 = val1;
+		config->vmid_mask1 = val2 & 0xFFFF;
+		break;
+	case 0x7:
+		/* COMP6, bits[23:16] */
+		config->vmid_mask0 = val1;
+		config->vmid_mask1 = val2 & 0xFFFFFF;
+		break;
+	case 0x8:
+		/* COMP7, bits[31:24] */
+		config->vmid_mask0 = val1;
+		config->vmid_mask1 = val2;
+		break;
+	default:
+		break;
+	}
+
+	/*
+	 * If software sets a mask bit to 1, it must program relevant byte
+	 * of vmid comparator value 0x0, otherwise behavior is unpredictable.
+	 * For example, if bit[3] of vmid_mask0 is 1, we must clear bits[31:24]
+	 * of vmid comparator0 value (corresponding to byte 0) register.
+	 */
+	mask = config->vmid_mask0;
+	for (i = 0; i < drvdata->numvmidc; i++) {
+		/* mask value of corresponding vmid comparator */
+		maskbyte = mask & ETMv4_EVENT_MASK;
+		/*
+		 * each bit corresponds to a byte of respective vmid comparator
+		 * value register
+		 */
+		for (j = 0; j < 8; j++) {
+			if (maskbyte & 1)
+				config->vmid_val[i] &= ~(0xFFUL << (j * 8));
+			maskbyte >>= 1;
+		}
+		/* Select the next vmid comparator mask value */
+		if (i == 3)
+			/* vmid comparators[4-7] */
+			mask = config->vmid_mask1;
+		else
+			mask >>= 0x8;
+	}
+	spin_unlock(&drvdata->spinlock);
+	return size;
+}
+static DEVICE_ATTR_RW(vmid_masks);
+
+static ssize_t cpu_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	int val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->cpu;
+	return scnprintf(buf, PAGE_SIZE, "%d\n", val);
+
+}
+static DEVICE_ATTR_RO(cpu);
+
+static ssize_t ts_source_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	int val;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	if (!drvdata->trfcr) {
+		val = -1;
+		goto out;
+	}
+
+	switch (drvdata->trfcr & TRFCR_ELx_TS_MASK) {
+	case TRFCR_ELx_TS_VIRTUAL:
+	case TRFCR_ELx_TS_GUEST_PHYSICAL:
+	case TRFCR_ELx_TS_PHYSICAL:
+		val = FIELD_GET(TRFCR_ELx_TS_MASK, drvdata->trfcr);
+		break;
+	default:
+		val = -1;
+		break;
+	}
+
+out:
+	return sysfs_emit(buf, "%d\n", val);
+}
+static DEVICE_ATTR_RO(ts_source);
+
+static struct attribute *coresight_etmv4_attrs[] = {
+	&dev_attr_nr_pe_cmp.attr,
+	&dev_attr_nr_addr_cmp.attr,
+	&dev_attr_nr_cntr.attr,
+	&dev_attr_nr_ext_inp.attr,
+	&dev_attr_numcidc.attr,
+	&dev_attr_numvmidc.attr,
+	&dev_attr_nrseqstate.attr,
+	&dev_attr_nr_resource.attr,
+	&dev_attr_nr_ss_cmp.attr,
+	&dev_attr_reset.attr,
+	&dev_attr_mode.attr,
+	&dev_attr_pe.attr,
+	&dev_attr_event.attr,
+	&dev_attr_event_instren.attr,
+	&dev_attr_event_ts.attr,
+	&dev_attr_syncfreq.attr,
+	&dev_attr_cyc_threshold.attr,
+	&dev_attr_bb_ctrl.attr,
+	&dev_attr_event_vinst.attr,
+	&dev_attr_s_exlevel_vinst.attr,
+	&dev_attr_ns_exlevel_vinst.attr,
+	&dev_attr_addr_idx.attr,
+	&dev_attr_addr_instdatatype.attr,
+	&dev_attr_addr_single.attr,
+	&dev_attr_addr_range.attr,
+	&dev_attr_addr_start.attr,
+	&dev_attr_addr_stop.attr,
+	&dev_attr_addr_ctxtype.attr,
+	&dev_attr_addr_context.attr,
+	&dev_attr_addr_exlevel_s_ns.attr,
+	&dev_attr_addr_cmp_view.attr,
+	&dev_attr_vinst_pe_cmp_start_stop.attr,
+	&dev_attr_sshot_idx.attr,
+	&dev_attr_sshot_ctrl.attr,
+	&dev_attr_sshot_pe_ctrl.attr,
+	&dev_attr_sshot_status.attr,
+	&dev_attr_seq_idx.attr,
+	&dev_attr_seq_state.attr,
+	&dev_attr_seq_event.attr,
+	&dev_attr_seq_reset_event.attr,
+	&dev_attr_cntr_idx.attr,
+	&dev_attr_cntrldvr.attr,
+	&dev_attr_cntr_val.attr,
+	&dev_attr_cntr_ctrl.attr,
+	&dev_attr_res_idx.attr,
+	&dev_attr_res_ctrl.attr,
+	&dev_attr_ctxid_idx.attr,
+	&dev_attr_ctxid_pid.attr,
+	&dev_attr_ctxid_masks.attr,
+	&dev_attr_vmid_idx.attr,
+	&dev_attr_vmid_val.attr,
+	&dev_attr_vmid_masks.attr,
+	&dev_attr_cpu.attr,
+	&dev_attr_ts_source.attr,
+	NULL,
+};
+
+/*
+ * Trace ID allocated dynamically on enable - but also allocate on read
+ * in case sysfs or perf read before enable to ensure consistent metadata
+ * information for trace decode
+ */
+static ssize_t trctraceid_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buf)
+{
+	int trace_id;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	trace_id = etm4_read_alloc_trace_id(drvdata);
+	if (trace_id < 0)
+		return trace_id;
+
+	return sysfs_emit(buf, "0x%x\n", trace_id);
+}
+
+struct etmv4_reg {
+	struct coresight_device *csdev;
+	u32 offset;
+	u32 data;
+};
+
+static void do_smp_cross_read(void *data)
+{
+	struct etmv4_reg *reg = data;
+
+	reg->data = etm4x_relaxed_read32(&reg->csdev->access, reg->offset);
+}
+
+static u32 etmv4_cross_read(const struct etmv4_drvdata *drvdata, u32 offset)
+{
+	struct etmv4_reg reg;
+
+	reg.offset = offset;
+	reg.csdev = drvdata->csdev;
+
+	/*
+	 * smp cross call ensures the CPU will be powered up before
+	 * accessing the ETMv4 trace core registers
+	 */
+	smp_call_function_single(drvdata->cpu, do_smp_cross_read, &reg, 1);
+	return reg.data;
+}
+
+static inline u32 coresight_etm4x_attr_to_offset(struct device_attribute *attr)
+{
+	struct dev_ext_attribute *eattr;
+
+	eattr = container_of(attr, struct dev_ext_attribute, attr);
+	return (u32)(unsigned long)eattr->var;
+}
+
+static ssize_t coresight_etm4x_reg_show(struct device *dev,
+					struct device_attribute *d_attr,
+					char *buf)
+{
+	u32 val, offset;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	offset = coresight_etm4x_attr_to_offset(d_attr);
+
+	pm_runtime_get_sync(dev->parent);
+	val = etmv4_cross_read(drvdata, offset);
+	pm_runtime_put_sync(dev->parent);
+
+	return scnprintf(buf, PAGE_SIZE, "0x%x\n", val);
+}
+
+static inline bool
+etm4x_register_implemented(struct etmv4_drvdata *drvdata, u32 offset)
+{
+	switch (offset) {
+	ETM_COMMON_SYSREG_LIST_CASES
+		/*
+		 * Common registers to ETE & ETM4x accessible via system
+		 * instructions are always implemented.
+		 */
+		return true;
+
+	ETM4x_ONLY_SYSREG_LIST_CASES
+		/*
+		 * We only support etm4x and ete. So if the device is not
+		 * ETE, it must be ETMv4x.
+		 */
+		return !etm4x_is_ete(drvdata);
+
+	ETM4x_MMAP_LIST_CASES
+		/*
+		 * Registers accessible only via memory-mapped registers
+		 * must not be accessed via system instructions.
+		 * We cannot access the drvdata->csdev here, as this
+		 * function is called during the device creation, via
+		 * coresight_register() and the csdev is not initialized
+		 * until that is done. So rely on the drvdata->base to
+		 * detect if we have a memory mapped access.
+		 * Also ETE doesn't implement memory mapped access, thus
+		 * it is sufficient to check that we are using mmio.
+		 */
+		return !!drvdata->base;
+
+	ETE_ONLY_SYSREG_LIST_CASES
+		return etm4x_is_ete(drvdata);
+	}
+
+	return false;
+}
+
+/*
+ * Hide the ETM4x registers that may not be available on the
+ * hardware.
+ * There are certain management registers unavailable via system
+ * instructions. Make those sysfs attributes hidden on such
+ * systems.
+ */
+static umode_t
+coresight_etm4x_attr_reg_implemented(struct kobject *kobj,
+				     struct attribute *attr, int unused)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct device_attribute *d_attr;
+	u32 offset;
+
+	d_attr = container_of(attr, struct device_attribute, attr);
+	offset = coresight_etm4x_attr_to_offset(d_attr);
+
+	if (etm4x_register_implemented(drvdata, offset))
+		return attr->mode;
+	return 0;
+}
+
+/*
+ * Macro to set an RO ext attribute with offset and show function.
+ * Offset is used in mgmt group to ensure only correct registers for
+ * the ETM / ETE variant are visible.
+ */
+#define coresight_etm4x_reg_showfn(name, offset, showfn) (	\
+	&((struct dev_ext_attribute[]) {			\
+	   {							\
+		__ATTR(name, 0444, showfn, NULL),		\
+		(void *)(unsigned long)offset			\
+	   }							\
+	})[0].attr.attr						\
+	)
+
+/* macro using the default coresight_etm4x_reg_show function */
+#define coresight_etm4x_reg(name, offset)	\
+	coresight_etm4x_reg_showfn(name, offset, coresight_etm4x_reg_show)
+
+static struct attribute *coresight_etmv4_mgmt_attrs[] = {
+	coresight_etm4x_reg(trcpdcr, TRCPDCR),
+	coresight_etm4x_reg(trcpdsr, TRCPDSR),
+	coresight_etm4x_reg(trclsr, TRCLSR),
+	coresight_etm4x_reg(trcauthstatus, TRCAUTHSTATUS),
+	coresight_etm4x_reg(trcdevid, TRCDEVID),
+	coresight_etm4x_reg(trcdevtype, TRCDEVTYPE),
+	coresight_etm4x_reg(trcpidr0, TRCPIDR0),
+	coresight_etm4x_reg(trcpidr1, TRCPIDR1),
+	coresight_etm4x_reg(trcpidr2, TRCPIDR2),
+	coresight_etm4x_reg(trcpidr3, TRCPIDR3),
+	coresight_etm4x_reg(trcoslsr, TRCOSLSR),
+	coresight_etm4x_reg(trcconfig, TRCCONFIGR),
+	coresight_etm4x_reg_showfn(trctraceid, TRCTRACEIDR, trctraceid_show),
+	coresight_etm4x_reg(trcdevarch, TRCDEVARCH),
+	NULL,
+};
+
+static struct attribute *coresight_etmv4_trcidr_attrs[] = {
+	coresight_etm4x_reg(trcidr0, TRCIDR0),
+	coresight_etm4x_reg(trcidr1, TRCIDR1),
+	coresight_etm4x_reg(trcidr2, TRCIDR2),
+	coresight_etm4x_reg(trcidr3, TRCIDR3),
+	coresight_etm4x_reg(trcidr4, TRCIDR4),
+	coresight_etm4x_reg(trcidr5, TRCIDR5),
+	/* trcidr[6,7] are reserved */
+	coresight_etm4x_reg(trcidr8, TRCIDR8),
+	coresight_etm4x_reg(trcidr9, TRCIDR9),
+	coresight_etm4x_reg(trcidr10, TRCIDR10),
+	coresight_etm4x_reg(trcidr11, TRCIDR11),
+	coresight_etm4x_reg(trcidr12, TRCIDR12),
+	coresight_etm4x_reg(trcidr13, TRCIDR13),
+	NULL,
+};
+
+static const struct attribute_group coresight_etmv4_group = {
+	.attrs = coresight_etmv4_attrs,
+};
+
+static const struct attribute_group coresight_etmv4_mgmt_group = {
+	.is_visible = coresight_etm4x_attr_reg_implemented,
+	.attrs = coresight_etmv4_mgmt_attrs,
+	.name = "mgmt",
+};
+
+static const struct attribute_group coresight_etmv4_trcidr_group = {
+	.attrs = coresight_etmv4_trcidr_attrs,
+	.name = "trcidr",
+};
+
+const struct attribute_group *coresight_etmv4_groups[] = {
+	&coresight_etmv4_group,
+	&coresight_etmv4_mgmt_group,
+	&coresight_etmv4_trcidr_group,
+	NULL,
+};
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h
new file mode 100644
index 0000000000..da17b6c49b
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etm4x.h
@@ -0,0 +1,1097 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2014-2015, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _CORESIGHT_CORESIGHT_ETM_H
+#define _CORESIGHT_CORESIGHT_ETM_H
+
+#include <asm/local.h>
+#include <linux/const.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include "coresight-priv.h"
+
+/*
+ * Device registers:
+ * 0x000 - 0x2FC: Trace		registers
+ * 0x300 - 0x314: Management	registers
+ * 0x318 - 0xEFC: Trace		registers
+ * 0xF00: Management		registers
+ * 0xFA0 - 0xFA4: Trace		registers
+ * 0xFA8 - 0xFFC: Management	registers
+ */
+/* Trace registers (0x000-0x2FC) */
+/* Main control and configuration registers */
+#define TRCPRGCTLR			0x004
+#define TRCPROCSELR			0x008
+#define TRCSTATR			0x00C
+#define TRCCONFIGR			0x010
+#define TRCAUXCTLR			0x018
+#define TRCEVENTCTL0R			0x020
+#define TRCEVENTCTL1R			0x024
+#define TRCRSR				0x028
+#define TRCSTALLCTLR			0x02C
+#define TRCTSCTLR			0x030
+#define TRCSYNCPR			0x034
+#define TRCCCCTLR			0x038
+#define TRCBBCTLR			0x03C
+#define TRCTRACEIDR			0x040
+#define TRCQCTLR			0x044
+/* Filtering control registers */
+#define TRCVICTLR			0x080
+#define TRCVIIECTLR			0x084
+#define TRCVISSCTLR			0x088
+#define TRCVIPCSSCTLR			0x08C
+#define TRCVDCTLR			0x0A0
+#define TRCVDSACCTLR			0x0A4
+#define TRCVDARCCTLR			0x0A8
+/* Derived resources registers */
+#define TRCSEQEVRn(n)			(0x100 + (n * 4)) /* n = 0-2 */
+#define TRCSEQRSTEVR			0x118
+#define TRCSEQSTR			0x11C
+#define TRCEXTINSELR			0x120
+#define TRCEXTINSELRn(n)		(0x120 + (n * 4)) /* n = 0-3 */
+#define TRCCNTRLDVRn(n)			(0x140 + (n * 4)) /* n = 0-3 */
+#define TRCCNTCTLRn(n)			(0x150 + (n * 4)) /* n = 0-3 */
+#define TRCCNTVRn(n)			(0x160 + (n * 4)) /* n = 0-3 */
+/* ID registers */
+#define TRCIDR8				0x180
+#define TRCIDR9				0x184
+#define TRCIDR10			0x188
+#define TRCIDR11			0x18C
+#define TRCIDR12			0x190
+#define TRCIDR13			0x194
+#define TRCIMSPEC0			0x1C0
+#define TRCIMSPECn(n)			(0x1C0 + (n * 4)) /* n = 1-7 */
+#define TRCIDR0				0x1E0
+#define TRCIDR1				0x1E4
+#define TRCIDR2				0x1E8
+#define TRCIDR3				0x1EC
+#define TRCIDR4				0x1F0
+#define TRCIDR5				0x1F4
+#define TRCIDR6				0x1F8
+#define TRCIDR7				0x1FC
+/*
+ * Resource selection registers, n = 2-31.
+ * First pair (regs 0, 1) is always present and is reserved.
+ */
+#define TRCRSCTLRn(n)			(0x200 + (n * 4))
+/* Single-shot comparator registers, n = 0-7 */
+#define TRCSSCCRn(n)			(0x280 + (n * 4))
+#define TRCSSCSRn(n)			(0x2A0 + (n * 4))
+#define TRCSSPCICRn(n)			(0x2C0 + (n * 4))
+/* Management registers (0x300-0x314) */
+#define TRCOSLAR			0x300
+#define TRCOSLSR			0x304
+#define TRCPDCR				0x310
+#define TRCPDSR				0x314
+/* Trace registers (0x318-0xEFC) */
+/* Address Comparator registers n = 0-15 */
+#define TRCACVRn(n)			(0x400 + (n * 8))
+#define TRCACATRn(n)			(0x480 + (n * 8))
+/* Data Value Comparator Value registers, n = 0-7 */
+#define TRCDVCVRn(n)			(0x500 + (n * 16))
+#define TRCDVCMRn(n)			(0x580 + (n * 16))
+/* ContextID/Virtual ContextID comparators, n = 0-7 */
+#define TRCCIDCVRn(n)			(0x600 + (n * 8))
+#define TRCVMIDCVRn(n)			(0x640 + (n * 8))
+#define TRCCIDCCTLR0			0x680
+#define TRCCIDCCTLR1			0x684
+#define TRCVMIDCCTLR0			0x688
+#define TRCVMIDCCTLR1			0x68C
+/* Management register (0xF00) */
+/* Integration control registers */
+#define TRCITCTRL			0xF00
+/* Trace registers (0xFA0-0xFA4) */
+/* Claim tag registers */
+#define TRCCLAIMSET			0xFA0
+#define TRCCLAIMCLR			0xFA4
+/* Management registers (0xFA8-0xFFC) */
+#define TRCDEVAFF0			0xFA8
+#define TRCDEVAFF1			0xFAC
+#define TRCLAR				0xFB0
+#define TRCLSR				0xFB4
+#define TRCAUTHSTATUS			0xFB8
+#define TRCDEVARCH			0xFBC
+#define TRCDEVID			0xFC8
+#define TRCDEVTYPE			0xFCC
+#define TRCPIDR4			0xFD0
+#define TRCPIDR5			0xFD4
+#define TRCPIDR6			0xFD8
+#define TRCPIDR7			0xFDC
+#define TRCPIDR0			0xFE0
+#define TRCPIDR1			0xFE4
+#define TRCPIDR2			0xFE8
+#define TRCPIDR3			0xFEC
+#define TRCCIDR0			0xFF0
+#define TRCCIDR1			0xFF4
+#define TRCCIDR2			0xFF8
+#define TRCCIDR3			0xFFC
+
+#define TRCRSR_TA			BIT(12)
+
+/*
+ * Bit positions of registers that are defined above, in the sysreg.h style
+ * of _MASK for multi bit fields and BIT() for single bits.
+ */
+#define TRCIDR0_INSTP0_MASK			GENMASK(2, 1)
+#define TRCIDR0_TRCBB				BIT(5)
+#define TRCIDR0_TRCCOND				BIT(6)
+#define TRCIDR0_TRCCCI				BIT(7)
+#define TRCIDR0_RETSTACK			BIT(9)
+#define TRCIDR0_NUMEVENT_MASK			GENMASK(11, 10)
+#define TRCIDR0_QSUPP_MASK			GENMASK(16, 15)
+#define TRCIDR0_TSSIZE_MASK			GENMASK(28, 24)
+
+#define TRCIDR2_CIDSIZE_MASK			GENMASK(9, 5)
+#define TRCIDR2_VMIDSIZE_MASK			GENMASK(14, 10)
+#define TRCIDR2_CCSIZE_MASK			GENMASK(28, 25)
+
+#define TRCIDR3_CCITMIN_MASK			GENMASK(11, 0)
+#define TRCIDR3_EXLEVEL_S_MASK			GENMASK(19, 16)
+#define TRCIDR3_EXLEVEL_NS_MASK			GENMASK(23, 20)
+#define TRCIDR3_TRCERR				BIT(24)
+#define TRCIDR3_SYNCPR				BIT(25)
+#define TRCIDR3_STALLCTL			BIT(26)
+#define TRCIDR3_SYSSTALL			BIT(27)
+#define TRCIDR3_NUMPROC_LO_MASK			GENMASK(30, 28)
+#define TRCIDR3_NUMPROC_HI_MASK			GENMASK(13, 12)
+#define TRCIDR3_NOOVERFLOW			BIT(31)
+
+#define TRCIDR4_NUMACPAIRS_MASK			GENMASK(3, 0)
+#define TRCIDR4_NUMPC_MASK			GENMASK(15, 12)
+#define TRCIDR4_NUMRSPAIR_MASK			GENMASK(19, 16)
+#define TRCIDR4_NUMSSCC_MASK			GENMASK(23, 20)
+#define TRCIDR4_NUMCIDC_MASK			GENMASK(27, 24)
+#define TRCIDR4_NUMVMIDC_MASK			GENMASK(31, 28)
+
+#define TRCIDR5_NUMEXTIN_MASK			GENMASK(8, 0)
+#define TRCIDR5_TRACEIDSIZE_MASK		GENMASK(21, 16)
+#define TRCIDR5_ATBTRIG				BIT(22)
+#define TRCIDR5_LPOVERRIDE			BIT(23)
+#define TRCIDR5_NUMSEQSTATE_MASK		GENMASK(27, 25)
+#define TRCIDR5_NUMCNTR_MASK			GENMASK(30, 28)
+
+#define TRCCONFIGR_INSTP0_LOAD			BIT(1)
+#define TRCCONFIGR_INSTP0_STORE			BIT(2)
+#define TRCCONFIGR_INSTP0_LOAD_STORE		(TRCCONFIGR_INSTP0_LOAD | TRCCONFIGR_INSTP0_STORE)
+#define TRCCONFIGR_BB				BIT(3)
+#define TRCCONFIGR_CCI				BIT(4)
+#define TRCCONFIGR_CID				BIT(6)
+#define TRCCONFIGR_VMID				BIT(7)
+#define TRCCONFIGR_COND_MASK			GENMASK(10, 8)
+#define TRCCONFIGR_TS				BIT(11)
+#define TRCCONFIGR_RS				BIT(12)
+#define TRCCONFIGR_QE_W_COUNTS			BIT(13)
+#define TRCCONFIGR_QE_WO_COUNTS			BIT(14)
+#define TRCCONFIGR_VMIDOPT			BIT(15)
+#define TRCCONFIGR_DA				BIT(16)
+#define TRCCONFIGR_DV				BIT(17)
+
+#define TRCEVENTCTL1R_INSTEN_MASK		GENMASK(3, 0)
+#define TRCEVENTCTL1R_INSTEN_0			BIT(0)
+#define TRCEVENTCTL1R_INSTEN_1			BIT(1)
+#define TRCEVENTCTL1R_INSTEN_2			BIT(2)
+#define TRCEVENTCTL1R_INSTEN_3			BIT(3)
+#define TRCEVENTCTL1R_ATB			BIT(11)
+#define TRCEVENTCTL1R_LPOVERRIDE		BIT(12)
+
+#define TRCSTALLCTLR_ISTALL			BIT(8)
+#define TRCSTALLCTLR_INSTPRIORITY		BIT(10)
+#define TRCSTALLCTLR_NOOVERFLOW			BIT(13)
+
+#define TRCVICTLR_EVENT_MASK			GENMASK(7, 0)
+#define TRCVICTLR_SSSTATUS			BIT(9)
+#define TRCVICTLR_TRCRESET			BIT(10)
+#define TRCVICTLR_TRCERR			BIT(11)
+#define TRCVICTLR_EXLEVEL_MASK			GENMASK(22, 16)
+#define TRCVICTLR_EXLEVEL_S_MASK		GENMASK(19, 16)
+#define TRCVICTLR_EXLEVEL_NS_MASK		GENMASK(22, 20)
+
+#define TRCACATRn_TYPE_MASK			GENMASK(1, 0)
+#define TRCACATRn_CONTEXTTYPE_MASK		GENMASK(3, 2)
+#define TRCACATRn_CONTEXTTYPE_CTXID		BIT(2)
+#define TRCACATRn_CONTEXTTYPE_VMID		BIT(3)
+#define TRCACATRn_CONTEXT_MASK			GENMASK(6, 4)
+#define TRCACATRn_EXLEVEL_MASK			GENMASK(14, 8)
+
+#define TRCSSCSRn_STATUS			BIT(31)
+#define TRCSSCCRn_SAC_ARC_RST_MASK		GENMASK(24, 0)
+
+#define TRCSSPCICRn_PC_MASK			GENMASK(7, 0)
+
+#define TRCBBCTLR_MODE				BIT(8)
+#define TRCBBCTLR_RANGE_MASK			GENMASK(7, 0)
+
+#define TRCRSCTLRn_PAIRINV			BIT(21)
+#define TRCRSCTLRn_INV				BIT(20)
+#define TRCRSCTLRn_GROUP_MASK			GENMASK(19, 16)
+#define TRCRSCTLRn_SELECT_MASK			GENMASK(15, 0)
+
+/*
+ * System instructions to access ETM registers.
+ * See ETMv4.4 spec ARM IHI0064F section 4.3.6 System instructions
+ */
+#define ETM4x_OFFSET_TO_REG(x)		((x) >> 2)
+
+#define ETM4x_CRn(n)			(((n) >> 7) & 0x7)
+#define ETM4x_Op2(n)			(((n) >> 4) & 0x7)
+#define ETM4x_CRm(n)			((n) & 0xf)
+
+#include <asm/sysreg.h>
+#define ETM4x_REG_NUM_TO_SYSREG(n)				\
+	sys_reg(2, 1, ETM4x_CRn(n), ETM4x_CRm(n), ETM4x_Op2(n))
+
+#define READ_ETM4x_REG(reg)					\
+	read_sysreg_s(ETM4x_REG_NUM_TO_SYSREG((reg)))
+#define WRITE_ETM4x_REG(val, reg)				\
+	write_sysreg_s(val, ETM4x_REG_NUM_TO_SYSREG((reg)))
+
+#define read_etm4x_sysreg_const_offset(offset)			\
+	READ_ETM4x_REG(ETM4x_OFFSET_TO_REG(offset))
+
+#define write_etm4x_sysreg_const_offset(val, offset)		\
+	WRITE_ETM4x_REG(val, ETM4x_OFFSET_TO_REG(offset))
+
+#define CASE_READ(res, x)					\
+	case (x): { (res) = read_etm4x_sysreg_const_offset((x)); break; }
+
+#define CASE_WRITE(val, x)					\
+	case (x): { write_etm4x_sysreg_const_offset((val), (x)); break; }
+
+#define CASE_NOP(__unused, x)					\
+	case (x):	/* fall through */
+
+#define ETE_ONLY_SYSREG_LIST(op, val)		\
+	CASE_##op((val), TRCRSR)		\
+	CASE_##op((val), TRCEXTINSELRn(1))	\
+	CASE_##op((val), TRCEXTINSELRn(2))	\
+	CASE_##op((val), TRCEXTINSELRn(3))
+
+/* List of registers accessible via System instructions */
+#define ETM4x_ONLY_SYSREG_LIST(op, val)		\
+	CASE_##op((val), TRCPROCSELR)		\
+	CASE_##op((val), TRCVDCTLR)		\
+	CASE_##op((val), TRCVDSACCTLR)		\
+	CASE_##op((val), TRCVDARCCTLR)		\
+	CASE_##op((val), TRCOSLAR)
+
+#define ETM_COMMON_SYSREG_LIST(op, val)		\
+	CASE_##op((val), TRCPRGCTLR)		\
+	CASE_##op((val), TRCSTATR)		\
+	CASE_##op((val), TRCCONFIGR)		\
+	CASE_##op((val), TRCAUXCTLR)		\
+	CASE_##op((val), TRCEVENTCTL0R)		\
+	CASE_##op((val), TRCEVENTCTL1R)		\
+	CASE_##op((val), TRCSTALLCTLR)		\
+	CASE_##op((val), TRCTSCTLR)		\
+	CASE_##op((val), TRCSYNCPR)		\
+	CASE_##op((val), TRCCCCTLR)		\
+	CASE_##op((val), TRCBBCTLR)		\
+	CASE_##op((val), TRCTRACEIDR)		\
+	CASE_##op((val), TRCQCTLR)		\
+	CASE_##op((val), TRCVICTLR)		\
+	CASE_##op((val), TRCVIIECTLR)		\
+	CASE_##op((val), TRCVISSCTLR)		\
+	CASE_##op((val), TRCVIPCSSCTLR)		\
+	CASE_##op((val), TRCSEQEVRn(0))		\
+	CASE_##op((val), TRCSEQEVRn(1))		\
+	CASE_##op((val), TRCSEQEVRn(2))		\
+	CASE_##op((val), TRCSEQRSTEVR)		\
+	CASE_##op((val), TRCSEQSTR)		\
+	CASE_##op((val), TRCEXTINSELR)		\
+	CASE_##op((val), TRCCNTRLDVRn(0))	\
+	CASE_##op((val), TRCCNTRLDVRn(1))	\
+	CASE_##op((val), TRCCNTRLDVRn(2))	\
+	CASE_##op((val), TRCCNTRLDVRn(3))	\
+	CASE_##op((val), TRCCNTCTLRn(0))	\
+	CASE_##op((val), TRCCNTCTLRn(1))	\
+	CASE_##op((val), TRCCNTCTLRn(2))	\
+	CASE_##op((val), TRCCNTCTLRn(3))	\
+	CASE_##op((val), TRCCNTVRn(0))		\
+	CASE_##op((val), TRCCNTVRn(1))		\
+	CASE_##op((val), TRCCNTVRn(2))		\
+	CASE_##op((val), TRCCNTVRn(3))		\
+	CASE_##op((val), TRCIDR8)		\
+	CASE_##op((val), TRCIDR9)		\
+	CASE_##op((val), TRCIDR10)		\
+	CASE_##op((val), TRCIDR11)		\
+	CASE_##op((val), TRCIDR12)		\
+	CASE_##op((val), TRCIDR13)		\
+	CASE_##op((val), TRCIMSPECn(0))		\
+	CASE_##op((val), TRCIMSPECn(1))		\
+	CASE_##op((val), TRCIMSPECn(2))		\
+	CASE_##op((val), TRCIMSPECn(3))		\
+	CASE_##op((val), TRCIMSPECn(4))		\
+	CASE_##op((val), TRCIMSPECn(5))		\
+	CASE_##op((val), TRCIMSPECn(6))		\
+	CASE_##op((val), TRCIMSPECn(7))		\
+	CASE_##op((val), TRCIDR0)		\
+	CASE_##op((val), TRCIDR1)		\
+	CASE_##op((val), TRCIDR2)		\
+	CASE_##op((val), TRCIDR3)		\
+	CASE_##op((val), TRCIDR4)		\
+	CASE_##op((val), TRCIDR5)		\
+	CASE_##op((val), TRCIDR6)		\
+	CASE_##op((val), TRCIDR7)		\
+	CASE_##op((val), TRCRSCTLRn(2))		\
+	CASE_##op((val), TRCRSCTLRn(3))		\
+	CASE_##op((val), TRCRSCTLRn(4))		\
+	CASE_##op((val), TRCRSCTLRn(5))		\
+	CASE_##op((val), TRCRSCTLRn(6))		\
+	CASE_##op((val), TRCRSCTLRn(7))		\
+	CASE_##op((val), TRCRSCTLRn(8))		\
+	CASE_##op((val), TRCRSCTLRn(9))		\
+	CASE_##op((val), TRCRSCTLRn(10))	\
+	CASE_##op((val), TRCRSCTLRn(11))	\
+	CASE_##op((val), TRCRSCTLRn(12))	\
+	CASE_##op((val), TRCRSCTLRn(13))	\
+	CASE_##op((val), TRCRSCTLRn(14))	\
+	CASE_##op((val), TRCRSCTLRn(15))	\
+	CASE_##op((val), TRCRSCTLRn(16))	\
+	CASE_##op((val), TRCRSCTLRn(17))	\
+	CASE_##op((val), TRCRSCTLRn(18))	\
+	CASE_##op((val), TRCRSCTLRn(19))	\
+	CASE_##op((val), TRCRSCTLRn(20))	\
+	CASE_##op((val), TRCRSCTLRn(21))	\
+	CASE_##op((val), TRCRSCTLRn(22))	\
+	CASE_##op((val), TRCRSCTLRn(23))	\
+	CASE_##op((val), TRCRSCTLRn(24))	\
+	CASE_##op((val), TRCRSCTLRn(25))	\
+	CASE_##op((val), TRCRSCTLRn(26))	\
+	CASE_##op((val), TRCRSCTLRn(27))	\
+	CASE_##op((val), TRCRSCTLRn(28))	\
+	CASE_##op((val), TRCRSCTLRn(29))	\
+	CASE_##op((val), TRCRSCTLRn(30))	\
+	CASE_##op((val), TRCRSCTLRn(31))	\
+	CASE_##op((val), TRCSSCCRn(0))		\
+	CASE_##op((val), TRCSSCCRn(1))		\
+	CASE_##op((val), TRCSSCCRn(2))		\
+	CASE_##op((val), TRCSSCCRn(3))		\
+	CASE_##op((val), TRCSSCCRn(4))		\
+	CASE_##op((val), TRCSSCCRn(5))		\
+	CASE_##op((val), TRCSSCCRn(6))		\
+	CASE_##op((val), TRCSSCCRn(7))		\
+	CASE_##op((val), TRCSSCSRn(0))		\
+	CASE_##op((val), TRCSSCSRn(1))		\
+	CASE_##op((val), TRCSSCSRn(2))		\
+	CASE_##op((val), TRCSSCSRn(3))		\
+	CASE_##op((val), TRCSSCSRn(4))		\
+	CASE_##op((val), TRCSSCSRn(5))		\
+	CASE_##op((val), TRCSSCSRn(6))		\
+	CASE_##op((val), TRCSSCSRn(7))		\
+	CASE_##op((val), TRCSSPCICRn(0))	\
+	CASE_##op((val), TRCSSPCICRn(1))	\
+	CASE_##op((val), TRCSSPCICRn(2))	\
+	CASE_##op((val), TRCSSPCICRn(3))	\
+	CASE_##op((val), TRCSSPCICRn(4))	\
+	CASE_##op((val), TRCSSPCICRn(5))	\
+	CASE_##op((val), TRCSSPCICRn(6))	\
+	CASE_##op((val), TRCSSPCICRn(7))	\
+	CASE_##op((val), TRCOSLSR)		\
+	CASE_##op((val), TRCACVRn(0))		\
+	CASE_##op((val), TRCACVRn(1))		\
+	CASE_##op((val), TRCACVRn(2))		\
+	CASE_##op((val), TRCACVRn(3))		\
+	CASE_##op((val), TRCACVRn(4))		\
+	CASE_##op((val), TRCACVRn(5))		\
+	CASE_##op((val), TRCACVRn(6))		\
+	CASE_##op((val), TRCACVRn(7))		\
+	CASE_##op((val), TRCACVRn(8))		\
+	CASE_##op((val), TRCACVRn(9))		\
+	CASE_##op((val), TRCACVRn(10))		\
+	CASE_##op((val), TRCACVRn(11))		\
+	CASE_##op((val), TRCACVRn(12))		\
+	CASE_##op((val), TRCACVRn(13))		\
+	CASE_##op((val), TRCACVRn(14))		\
+	CASE_##op((val), TRCACVRn(15))		\
+	CASE_##op((val), TRCACATRn(0))		\
+	CASE_##op((val), TRCACATRn(1))		\
+	CASE_##op((val), TRCACATRn(2))		\
+	CASE_##op((val), TRCACATRn(3))		\
+	CASE_##op((val), TRCACATRn(4))		\
+	CASE_##op((val), TRCACATRn(5))		\
+	CASE_##op((val), TRCACATRn(6))		\
+	CASE_##op((val), TRCACATRn(7))		\
+	CASE_##op((val), TRCACATRn(8))		\
+	CASE_##op((val), TRCACATRn(9))		\
+	CASE_##op((val), TRCACATRn(10))		\
+	CASE_##op((val), TRCACATRn(11))		\
+	CASE_##op((val), TRCACATRn(12))		\
+	CASE_##op((val), TRCACATRn(13))		\
+	CASE_##op((val), TRCACATRn(14))		\
+	CASE_##op((val), TRCACATRn(15))		\
+	CASE_##op((val), TRCDVCVRn(0))		\
+	CASE_##op((val), TRCDVCVRn(1))		\
+	CASE_##op((val), TRCDVCVRn(2))		\
+	CASE_##op((val), TRCDVCVRn(3))		\
+	CASE_##op((val), TRCDVCVRn(4))		\
+	CASE_##op((val), TRCDVCVRn(5))		\
+	CASE_##op((val), TRCDVCVRn(6))		\
+	CASE_##op((val), TRCDVCVRn(7))		\
+	CASE_##op((val), TRCDVCMRn(0))		\
+	CASE_##op((val), TRCDVCMRn(1))		\
+	CASE_##op((val), TRCDVCMRn(2))		\
+	CASE_##op((val), TRCDVCMRn(3))		\
+	CASE_##op((val), TRCDVCMRn(4))		\
+	CASE_##op((val), TRCDVCMRn(5))		\
+	CASE_##op((val), TRCDVCMRn(6))		\
+	CASE_##op((val), TRCDVCMRn(7))		\
+	CASE_##op((val), TRCCIDCVRn(0))		\
+	CASE_##op((val), TRCCIDCVRn(1))		\
+	CASE_##op((val), TRCCIDCVRn(2))		\
+	CASE_##op((val), TRCCIDCVRn(3))		\
+	CASE_##op((val), TRCCIDCVRn(4))		\
+	CASE_##op((val), TRCCIDCVRn(5))		\
+	CASE_##op((val), TRCCIDCVRn(6))		\
+	CASE_##op((val), TRCCIDCVRn(7))		\
+	CASE_##op((val), TRCVMIDCVRn(0))	\
+	CASE_##op((val), TRCVMIDCVRn(1))	\
+	CASE_##op((val), TRCVMIDCVRn(2))	\
+	CASE_##op((val), TRCVMIDCVRn(3))	\
+	CASE_##op((val), TRCVMIDCVRn(4))	\
+	CASE_##op((val), TRCVMIDCVRn(5))	\
+	CASE_##op((val), TRCVMIDCVRn(6))	\
+	CASE_##op((val), TRCVMIDCVRn(7))	\
+	CASE_##op((val), TRCCIDCCTLR0)		\
+	CASE_##op((val), TRCCIDCCTLR1)		\
+	CASE_##op((val), TRCVMIDCCTLR0)		\
+	CASE_##op((val), TRCVMIDCCTLR1)		\
+	CASE_##op((val), TRCCLAIMSET)		\
+	CASE_##op((val), TRCCLAIMCLR)		\
+	CASE_##op((val), TRCAUTHSTATUS)		\
+	CASE_##op((val), TRCDEVARCH)		\
+	CASE_##op((val), TRCDEVID)
+
+/* List of registers only accessible via memory-mapped interface */
+#define ETM_MMAP_LIST(op, val)			\
+	CASE_##op((val), TRCDEVTYPE)		\
+	CASE_##op((val), TRCPDCR)		\
+	CASE_##op((val), TRCPDSR)		\
+	CASE_##op((val), TRCDEVAFF0)		\
+	CASE_##op((val), TRCDEVAFF1)		\
+	CASE_##op((val), TRCLAR)		\
+	CASE_##op((val), TRCLSR)		\
+	CASE_##op((val), TRCITCTRL)		\
+	CASE_##op((val), TRCPIDR4)		\
+	CASE_##op((val), TRCPIDR0)		\
+	CASE_##op((val), TRCPIDR1)		\
+	CASE_##op((val), TRCPIDR2)		\
+	CASE_##op((val), TRCPIDR3)
+
+#define ETM4x_READ_SYSREG_CASES(res)		\
+	ETM_COMMON_SYSREG_LIST(READ, (res))	\
+	ETM4x_ONLY_SYSREG_LIST(READ, (res))
+
+#define ETM4x_WRITE_SYSREG_CASES(val)		\
+	ETM_COMMON_SYSREG_LIST(WRITE, (val))	\
+	ETM4x_ONLY_SYSREG_LIST(WRITE, (val))
+
+#define ETM_COMMON_SYSREG_LIST_CASES		\
+	ETM_COMMON_SYSREG_LIST(NOP, __unused)
+
+#define ETM4x_ONLY_SYSREG_LIST_CASES		\
+	ETM4x_ONLY_SYSREG_LIST(NOP, __unused)
+
+#define ETM4x_SYSREG_LIST_CASES			\
+	ETM_COMMON_SYSREG_LIST_CASES		\
+	ETM4x_ONLY_SYSREG_LIST(NOP, __unused)
+
+#define ETM4x_MMAP_LIST_CASES		ETM_MMAP_LIST(NOP, __unused)
+
+/* ETE only supports system register access */
+#define ETE_READ_CASES(res)			\
+	ETM_COMMON_SYSREG_LIST(READ, (res))	\
+	ETE_ONLY_SYSREG_LIST(READ, (res))
+
+#define ETE_WRITE_CASES(val)			\
+	ETM_COMMON_SYSREG_LIST(WRITE, (val))	\
+	ETE_ONLY_SYSREG_LIST(WRITE, (val))
+
+#define ETE_ONLY_SYSREG_LIST_CASES		\
+	ETE_ONLY_SYSREG_LIST(NOP, __unused)
+
+#define read_etm4x_sysreg_offset(offset, _64bit)				\
+	({									\
+		u64 __val;							\
+										\
+		if (__is_constexpr((offset)))					\
+			__val = read_etm4x_sysreg_const_offset((offset));	\
+		else								\
+			__val = etm4x_sysreg_read((offset), true, (_64bit));	\
+		__val;								\
+	 })
+
+#define write_etm4x_sysreg_offset(val, offset, _64bit)			\
+	do {								\
+		if (__builtin_constant_p((offset)))			\
+			write_etm4x_sysreg_const_offset((val),		\
+							(offset));	\
+		else							\
+			etm4x_sysreg_write((val), (offset), true,	\
+					   (_64bit));			\
+	} while (0)
+
+
+#define etm4x_relaxed_read32(csa, offset)				\
+	((u32)((csa)->io_mem ?						\
+		 readl_relaxed((csa)->base + (offset)) :		\
+		 read_etm4x_sysreg_offset((offset), false)))
+
+#define etm4x_relaxed_read64(csa, offset)				\
+	((u64)((csa)->io_mem ?						\
+		 readq_relaxed((csa)->base + (offset)) :		\
+		 read_etm4x_sysreg_offset((offset), true)))
+
+#define etm4x_read32(csa, offset)					\
+	({								\
+		u32 __val = etm4x_relaxed_read32((csa), (offset));	\
+		__io_ar(__val);						\
+		__val;							\
+	 })
+
+#define etm4x_read64(csa, offset)					\
+	({								\
+		u64 __val = etm4x_relaxed_read64((csa), (offset));	\
+		__io_ar(__val);						\
+		__val;							\
+	 })
+
+#define etm4x_relaxed_write32(csa, val, offset)				\
+	do {								\
+		if ((csa)->io_mem)					\
+			writel_relaxed((val), (csa)->base + (offset));	\
+		else							\
+			write_etm4x_sysreg_offset((val), (offset),	\
+						  false);		\
+	} while (0)
+
+#define etm4x_relaxed_write64(csa, val, offset)				\
+	do {								\
+		if ((csa)->io_mem)					\
+			writeq_relaxed((val), (csa)->base + (offset));	\
+		else							\
+			write_etm4x_sysreg_offset((val), (offset),	\
+						  true);		\
+	} while (0)
+
+#define etm4x_write32(csa, val, offset)					\
+	do {								\
+		__io_bw();						\
+		etm4x_relaxed_write32((csa), (val), (offset));		\
+	} while (0)
+
+#define etm4x_write64(csa, val, offset)					\
+	do {								\
+		__io_bw();						\
+		etm4x_relaxed_write64((csa), (val), (offset));		\
+	} while (0)
+
+
+/* ETMv4 resources */
+#define ETM_MAX_NR_PE			8
+#define ETMv4_MAX_CNTR			4
+#define ETM_MAX_SEQ_STATES		4
+#define ETM_MAX_EXT_INP_SEL		4
+#define ETM_MAX_EXT_INP			256
+#define ETM_MAX_EXT_OUT			4
+#define ETM_MAX_SINGLE_ADDR_CMP		16
+#define ETM_MAX_ADDR_RANGE_CMP		(ETM_MAX_SINGLE_ADDR_CMP / 2)
+#define ETM_MAX_DATA_VAL_CMP		8
+#define ETMv4_MAX_CTXID_CMP		8
+#define ETM_MAX_VMID_CMP		8
+#define ETM_MAX_PE_CMP			8
+#define ETM_MAX_RES_SEL			32
+#define ETM_MAX_SS_CMP			8
+
+#define ETMv4_SYNC_MASK			0x1F
+#define ETM_CYC_THRESHOLD_MASK		0xFFF
+#define ETM_CYC_THRESHOLD_DEFAULT       0x100
+#define ETMv4_EVENT_MASK		0xFF
+#define ETM_CNTR_MAX_VAL		0xFFFF
+#define ETM_TRACEID_MASK		0x3f
+
+/* ETMv4 programming modes */
+#define ETM_MODE_EXCLUDE		BIT(0)
+#define ETM_MODE_LOAD			BIT(1)
+#define ETM_MODE_STORE			BIT(2)
+#define ETM_MODE_LOAD_STORE		BIT(3)
+#define ETM_MODE_BB			BIT(4)
+#define ETMv4_MODE_CYCACC		BIT(5)
+#define ETMv4_MODE_CTXID		BIT(6)
+#define ETM_MODE_VMID			BIT(7)
+#define ETM_MODE_COND(val)		BMVAL(val, 8, 10)
+#define ETMv4_MODE_TIMESTAMP		BIT(11)
+#define ETM_MODE_RETURNSTACK		BIT(12)
+#define ETM_MODE_QELEM(val)		BMVAL(val, 13, 14)
+#define ETM_MODE_DATA_TRACE_ADDR	BIT(15)
+#define ETM_MODE_DATA_TRACE_VAL		BIT(16)
+#define ETM_MODE_ISTALL			BIT(17)
+#define ETM_MODE_DSTALL			BIT(18)
+#define ETM_MODE_ATB_TRIGGER		BIT(19)
+#define ETM_MODE_LPOVERRIDE		BIT(20)
+#define ETM_MODE_ISTALL_EN		BIT(21)
+#define ETM_MODE_DSTALL_EN		BIT(22)
+#define ETM_MODE_INSTPRIO		BIT(23)
+#define ETM_MODE_NOOVERFLOW		BIT(24)
+#define ETM_MODE_TRACE_RESET		BIT(25)
+#define ETM_MODE_TRACE_ERR		BIT(26)
+#define ETM_MODE_VIEWINST_STARTSTOP	BIT(27)
+#define ETMv4_MODE_ALL			(GENMASK(27, 0) | \
+					 ETM_MODE_EXCL_KERN | \
+					 ETM_MODE_EXCL_USER)
+
+/*
+ * TRCOSLSR.OSLM advertises the OS Lock model.
+ * OSLM[2:0] = TRCOSLSR[4:3,0]
+ *
+ *	0b000 - Trace OS Lock is not implemented.
+ *	0b010 - Trace OS Lock is implemented.
+ *	0b100 - Trace OS Lock is not implemented, unit is controlled by PE OS Lock.
+ */
+#define ETM_OSLOCK_NI		0b000
+#define ETM_OSLOCK_PRESENT	0b010
+#define ETM_OSLOCK_PE		0b100
+
+#define ETM_OSLSR_OSLM(oslsr)	((((oslsr) & GENMASK(4, 3)) >> 2) | (oslsr & 0x1))
+
+/*
+ * TRCDEVARCH Bit field definitions
+ * Bits[31:21]	- ARCHITECT = Always Arm Ltd.
+ *                * Bits[31:28] = 0x4
+ *                * Bits[27:21] = 0b0111011
+ * Bit[20]	- PRESENT,  Indicates the presence of this register.
+ *
+ * Bit[19:16]	- REVISION, Revision of the architecture.
+ *
+ * Bit[15:0]	- ARCHID, Identifies this component as an ETM
+ *                * Bits[15:12] - architecture version of ETM
+ *                *             = 4 for ETMv4
+ *                * Bits[11:0] = 0xA13, architecture part number for ETM.
+ */
+#define ETM_DEVARCH_ARCHITECT_MASK		GENMASK(31, 21)
+#define ETM_DEVARCH_ARCHITECT_ARM		((0x4 << 28) | (0b0111011 << 21))
+#define ETM_DEVARCH_PRESENT			BIT(20)
+#define ETM_DEVARCH_REVISION_SHIFT		16
+#define ETM_DEVARCH_REVISION_MASK		GENMASK(19, 16)
+#define ETM_DEVARCH_REVISION(x)			\
+	(((x) & ETM_DEVARCH_REVISION_MASK) >> ETM_DEVARCH_REVISION_SHIFT)
+#define ETM_DEVARCH_ARCHID_MASK			GENMASK(15, 0)
+#define ETM_DEVARCH_ARCHID_ARCH_VER_SHIFT	12
+#define ETM_DEVARCH_ARCHID_ARCH_VER_MASK	GENMASK(15, 12)
+#define ETM_DEVARCH_ARCHID_ARCH_VER(x)		\
+	(((x) & ETM_DEVARCH_ARCHID_ARCH_VER_MASK) >> ETM_DEVARCH_ARCHID_ARCH_VER_SHIFT)
+
+#define ETM_DEVARCH_MAKE_ARCHID_ARCH_VER(ver)			\
+	(((ver) << ETM_DEVARCH_ARCHID_ARCH_VER_SHIFT) & ETM_DEVARCH_ARCHID_ARCH_VER_MASK)
+
+#define ETM_DEVARCH_ARCHID_ARCH_PART(x)		((x) & 0xfffUL)
+
+#define ETM_DEVARCH_MAKE_ARCHID(major)			\
+	((ETM_DEVARCH_MAKE_ARCHID_ARCH_VER(major)) | ETM_DEVARCH_ARCHID_ARCH_PART(0xA13))
+
+#define ETM_DEVARCH_ARCHID_ETMv4x		ETM_DEVARCH_MAKE_ARCHID(0x4)
+#define ETM_DEVARCH_ARCHID_ETE			ETM_DEVARCH_MAKE_ARCHID(0x5)
+
+#define ETM_DEVARCH_ID_MASK						\
+	(ETM_DEVARCH_ARCHITECT_MASK | ETM_DEVARCH_ARCHID_MASK | ETM_DEVARCH_PRESENT)
+#define ETM_DEVARCH_ETMv4x_ARCH						\
+	(ETM_DEVARCH_ARCHITECT_ARM | ETM_DEVARCH_ARCHID_ETMv4x | ETM_DEVARCH_PRESENT)
+#define ETM_DEVARCH_ETE_ARCH						\
+	(ETM_DEVARCH_ARCHITECT_ARM | ETM_DEVARCH_ARCHID_ETE | ETM_DEVARCH_PRESENT)
+
+#define CS_DEVTYPE_PE_TRACE		0x00000013
+
+#define TRCSTATR_IDLE_BIT		0
+#define TRCSTATR_PMSTABLE_BIT		1
+#define ETM_DEFAULT_ADDR_COMP		0
+
+#define TRCSSCSRn_PC			BIT(3)
+
+/* PowerDown Control Register bits */
+#define TRCPDCR_PU			BIT(3)
+
+#define TRCACATR_EXLEVEL_SHIFT		8
+
+/*
+ * Exception level mask for Secure and Non-Secure ELs.
+ * ETM defines the bits for EL control (e.g, TRVICTLR, TRCACTRn).
+ * The Secure and Non-Secure ELs are always to gether.
+ * Non-secure EL3 is never implemented.
+ * We use the following generic mask as they appear in different
+ * registers and this can be shifted for the appropriate
+ * fields.
+ */
+#define ETM_EXLEVEL_S_APP		BIT(0)	/* Secure EL0		*/
+#define ETM_EXLEVEL_S_OS		BIT(1)	/* Secure EL1		*/
+#define ETM_EXLEVEL_S_HYP		BIT(2)	/* Secure EL2		*/
+#define ETM_EXLEVEL_S_MON		BIT(3)	/* Secure EL3/Monitor	*/
+#define ETM_EXLEVEL_NS_APP		BIT(4)	/* NonSecure EL0	*/
+#define ETM_EXLEVEL_NS_OS		BIT(5)	/* NonSecure EL1	*/
+#define ETM_EXLEVEL_NS_HYP		BIT(6)	/* NonSecure EL2	*/
+
+/* access level controls in TRCACATRn */
+#define TRCACATR_EXLEVEL_SHIFT		8
+
+#define ETM_TRCIDR1_ARCH_MAJOR_SHIFT	8
+#define ETM_TRCIDR1_ARCH_MAJOR_MASK	(0xfU << ETM_TRCIDR1_ARCH_MAJOR_SHIFT)
+#define ETM_TRCIDR1_ARCH_MAJOR(x)	\
+	(((x) & ETM_TRCIDR1_ARCH_MAJOR_MASK) >> ETM_TRCIDR1_ARCH_MAJOR_SHIFT)
+#define ETM_TRCIDR1_ARCH_MINOR_SHIFT	4
+#define ETM_TRCIDR1_ARCH_MINOR_MASK	(0xfU << ETM_TRCIDR1_ARCH_MINOR_SHIFT)
+#define ETM_TRCIDR1_ARCH_MINOR(x)	\
+	(((x) & ETM_TRCIDR1_ARCH_MINOR_MASK) >> ETM_TRCIDR1_ARCH_MINOR_SHIFT)
+#define ETM_TRCIDR1_ARCH_SHIFT		ETM_TRCIDR1_ARCH_MINOR_SHIFT
+#define ETM_TRCIDR1_ARCH_MASK		\
+	(ETM_TRCIDR1_ARCH_MAJOR_MASK | ETM_TRCIDR1_ARCH_MINOR_MASK)
+
+#define ETM_TRCIDR1_ARCH_ETMv4		0x4
+
+/*
+ * Driver representation of the ETM architecture.
+ * The version of an ETM component can be detected from
+ *
+ * TRCDEVARCH	- CoreSight architected register
+ *                - Bits[15:12] - Major version
+ *                - Bits[19:16] - Minor version
+ *
+ * We must rely only on TRCDEVARCH for the version information. Even though,
+ * TRCIDR1 also provides the architecture version, it is a "Trace" register
+ * and as such must be accessed only with Trace power domain ON. This may
+ * not be available at probe time.
+ *
+ * Now to make certain decisions easier based on the version
+ * we use an internal representation of the version in the
+ * driver, as follows :
+ *
+ * ETM_ARCH_VERSION[7:0], where :
+ *      Bits[7:4] - Major version
+ *      Bits[3:0] - Minro version
+ */
+#define ETM_ARCH_VERSION(major, minor)		\
+	((((major) & 0xfU) << 4) | (((minor) & 0xfU)))
+#define ETM_ARCH_MAJOR_VERSION(arch)	(((arch) >> 4) & 0xfU)
+#define ETM_ARCH_MINOR_VERSION(arch)	((arch) & 0xfU)
+
+#define ETM_ARCH_V4	ETM_ARCH_VERSION(4, 0)
+#define ETM_ARCH_ETE	ETM_ARCH_VERSION(5, 0)
+
+/* Interpretation of resource numbers change at ETM v4.3 architecture */
+#define ETM_ARCH_V4_3	ETM_ARCH_VERSION(4, 3)
+
+static inline u8 etm_devarch_to_arch(u32 devarch)
+{
+	return ETM_ARCH_VERSION(ETM_DEVARCH_ARCHID_ARCH_VER(devarch),
+				ETM_DEVARCH_REVISION(devarch));
+}
+
+enum etm_impdef_type {
+	ETM4_IMPDEF_HISI_CORE_COMMIT,
+	ETM4_IMPDEF_FEATURE_MAX,
+};
+
+/**
+ * struct etmv4_config - configuration information related to an ETMv4
+ * @mode:	Controls various modes supported by this ETM.
+ * @pe_sel:	Controls which PE to trace.
+ * @cfg:	Controls the tracing options.
+ * @eventctrl0: Controls the tracing of arbitrary events.
+ * @eventctrl1: Controls the behavior of the events that @event_ctrl0 selects.
+ * @stallctl:	If functionality that prevents trace unit buffer overflows
+ *		is available.
+ * @ts_ctrl:	Controls the insertion of global timestamps in the
+ *		trace streams.
+ * @syncfreq:	Controls how often trace synchronization requests occur.
+ *		the TRCCCCTLR register.
+ * @ccctlr:	Sets the threshold value for cycle counting.
+ * @vinst_ctrl:	Controls instruction trace filtering.
+ * @viiectlr:	Set or read, the address range comparators.
+ * @vissctlr:	Set, or read, the single address comparators that control the
+ *		ViewInst start-stop logic.
+ * @vipcssctlr:	Set, or read, which PE comparator inputs can control the
+ *		ViewInst start-stop logic.
+ * @seq_idx:	Sequencor index selector.
+ * @seq_ctrl:	Control for the sequencer state transition control register.
+ * @seq_rst:	Moves the sequencer to state 0 when a programmed event occurs.
+ * @seq_state:	Set, or read the sequencer state.
+ * @cntr_idx:	Counter index seletor.
+ * @cntrldvr:	Sets or returns the reload count value for a counter.
+ * @cntr_ctrl:	Controls the operation of a counter.
+ * @cntr_val:	Sets or returns the value for a counter.
+ * @res_idx:	Resource index selector.
+ * @res_ctrl:	Controls the selection of the resources in the trace unit.
+ * @ss_idx:	Single-shot index selector.
+ * @ss_ctrl:	Controls the corresponding single-shot comparator resource.
+ * @ss_status:	The status of the corresponding single-shot comparator.
+ * @ss_pe_cmp:	Selects the PE comparator inputs for Single-shot control.
+ * @addr_idx:	Address comparator index selector.
+ * @addr_val:	Value for address comparator.
+ * @addr_acc:	Address comparator access type.
+ * @addr_type:	Current status of the comparator register.
+ * @ctxid_idx:	Context ID index selector.
+ * @ctxid_pid:	Value of the context ID comparator.
+ * @ctxid_mask0:Context ID comparator mask for comparator 0-3.
+ * @ctxid_mask1:Context ID comparator mask for comparator 4-7.
+ * @vmid_idx:	VM ID index selector.
+ * @vmid_val:	Value of the VM ID comparator.
+ * @vmid_mask0:	VM ID comparator mask for comparator 0-3.
+ * @vmid_mask1:	VM ID comparator mask for comparator 4-7.
+ * @ext_inp:	External input selection.
+ * @s_ex_level: Secure ELs where tracing is supported.
+ */
+struct etmv4_config {
+	u32				mode;
+	u32				pe_sel;
+	u32				cfg;
+	u32				eventctrl0;
+	u32				eventctrl1;
+	u32				stall_ctrl;
+	u32				ts_ctrl;
+	u32				syncfreq;
+	u32				ccctlr;
+	u32				bb_ctrl;
+	u32				vinst_ctrl;
+	u32				viiectlr;
+	u32				vissctlr;
+	u32				vipcssctlr;
+	u8				seq_idx;
+	u32				seq_ctrl[ETM_MAX_SEQ_STATES];
+	u32				seq_rst;
+	u32				seq_state;
+	u8				cntr_idx;
+	u32				cntrldvr[ETMv4_MAX_CNTR];
+	u32				cntr_ctrl[ETMv4_MAX_CNTR];
+	u32				cntr_val[ETMv4_MAX_CNTR];
+	u8				res_idx;
+	u32				res_ctrl[ETM_MAX_RES_SEL];
+	u8				ss_idx;
+	u32				ss_ctrl[ETM_MAX_SS_CMP];
+	u32				ss_status[ETM_MAX_SS_CMP];
+	u32				ss_pe_cmp[ETM_MAX_SS_CMP];
+	u8				addr_idx;
+	u64				addr_val[ETM_MAX_SINGLE_ADDR_CMP];
+	u64				addr_acc[ETM_MAX_SINGLE_ADDR_CMP];
+	u8				addr_type[ETM_MAX_SINGLE_ADDR_CMP];
+	u8				ctxid_idx;
+	u64				ctxid_pid[ETMv4_MAX_CTXID_CMP];
+	u32				ctxid_mask0;
+	u32				ctxid_mask1;
+	u8				vmid_idx;
+	u64				vmid_val[ETM_MAX_VMID_CMP];
+	u32				vmid_mask0;
+	u32				vmid_mask1;
+	u32				ext_inp;
+	u8				s_ex_level;
+};
+
+/**
+ * struct etm4_save_state - state to be preserved when ETM is without power
+ */
+struct etmv4_save_state {
+	u32	trcprgctlr;
+	u32	trcprocselr;
+	u32	trcconfigr;
+	u32	trcauxctlr;
+	u32	trceventctl0r;
+	u32	trceventctl1r;
+	u32	trcstallctlr;
+	u32	trctsctlr;
+	u32	trcsyncpr;
+	u32	trcccctlr;
+	u32	trcbbctlr;
+	u32	trctraceidr;
+	u32	trcqctlr;
+
+	u32	trcvictlr;
+	u32	trcviiectlr;
+	u32	trcvissctlr;
+	u32	trcvipcssctlr;
+	u32	trcvdctlr;
+	u32	trcvdsacctlr;
+	u32	trcvdarcctlr;
+
+	u32	trcseqevr[ETM_MAX_SEQ_STATES];
+	u32	trcseqrstevr;
+	u32	trcseqstr;
+	u32	trcextinselr;
+	u32	trccntrldvr[ETMv4_MAX_CNTR];
+	u32	trccntctlr[ETMv4_MAX_CNTR];
+	u32	trccntvr[ETMv4_MAX_CNTR];
+
+	u32	trcrsctlr[ETM_MAX_RES_SEL];
+
+	u32	trcssccr[ETM_MAX_SS_CMP];
+	u32	trcsscsr[ETM_MAX_SS_CMP];
+	u32	trcsspcicr[ETM_MAX_SS_CMP];
+
+	u64	trcacvr[ETM_MAX_SINGLE_ADDR_CMP];
+	u64	trcacatr[ETM_MAX_SINGLE_ADDR_CMP];
+	u64	trccidcvr[ETMv4_MAX_CTXID_CMP];
+	u64	trcvmidcvr[ETM_MAX_VMID_CMP];
+	u32	trccidcctlr0;
+	u32	trccidcctlr1;
+	u32	trcvmidcctlr0;
+	u32	trcvmidcctlr1;
+
+	u32	trcclaimset;
+
+	u32	cntr_val[ETMv4_MAX_CNTR];
+	u32	seq_state;
+	u32	vinst_ctrl;
+	u32	ss_status[ETM_MAX_SS_CMP];
+
+	u32	trcpdcr;
+};
+
+/**
+ * struct etm4_drvdata - specifics associated to an ETM component
+ * @pclk        APB clock if present, otherwise NULL
+ * @base:       Memory mapped base address for this component.
+ * @csdev:      Component vitals needed by the framework.
+ * @spinlock:   Only one at a time pls.
+ * @mode:	This tracer's mode, i.e sysFS, Perf or disabled.
+ * @cpu:        The cpu this component is affined to.
+ * @arch:       ETM architecture version.
+ * @nr_pe:	The number of processing entity available for tracing.
+ * @nr_pe_cmp:	The number of processing entity comparator inputs that are
+ *		available for tracing.
+ * @nr_addr_cmp:Number of pairs of address comparators available
+ *		as found in ETMIDR4 0-3.
+ * @nr_cntr:    Number of counters as found in ETMIDR5 bit 28-30.
+ * @nr_ext_inp: Number of external input.
+ * @numcidc:	Number of contextID comparators.
+ * @numvmidc:	Number of VMID comparators.
+ * @nrseqstate: The number of sequencer states that are implemented.
+ * @nr_event:	Indicates how many events the trace unit support.
+ * @nr_resource:The number of resource selection pairs available for tracing.
+ * @nr_ss_cmp:	Number of single-shot comparator controls that are available.
+ * @trcid:	value of the current ID for this component.
+ * @trcid_size: Indicates the trace ID width.
+ * @ts_size:	Global timestamp size field.
+ * @ctxid_size:	Size of the context ID field to consider.
+ * @vmid_size:	Size of the VM ID comparator to consider.
+ * @ccsize:	Indicates the size of the cycle counter in bits.
+ * @ccitmin:	minimum value that can be programmed in
+ * @s_ex_level:	In secure state, indicates whether instruction tracing is
+ *		supported for the corresponding Exception level.
+ * @ns_ex_level:In non-secure state, indicates whether instruction tracing is
+ *		supported for the corresponding Exception level.
+ * @sticky_enable: true if ETM base configuration has been done.
+ * @boot_enable:True if we should start tracing at boot time.
+ * @os_unlock:  True if access to management registers is allowed.
+ * @instrp0:	Tracing of load and store instructions
+ *		as P0 elements is supported.
+ * @trcbb:	Indicates if the trace unit supports branch broadcast tracing.
+ * @trccond:	If the trace unit supports conditional
+ *		instruction tracing.
+ * @retstack:	Indicates if the implementation supports a return stack.
+ * @trccci:	Indicates if the trace unit supports cycle counting
+ *		for instruction.
+ * @q_support:	Q element support characteristics.
+ * @trc_error:	Whether a trace unit can trace a system
+ *		error exception.
+ * @syncpr:	Indicates if an implementation has a fixed
+ *		synchronization period.
+ * @stall_ctrl:	Enables trace unit functionality that prevents trace
+ *		unit buffer overflows.
+ * @sysstall:	Does the system support stall control of the PE?
+ * @nooverflow:	Indicate if overflow prevention is supported.
+ * @atbtrig:	If the implementation can support ATB triggers
+ * @lpoverride:	If the implementation can support low-power state over.
+ * @trfcr:	If the CPU supports FEAT_TRF, value of the TRFCR_ELx that
+ *		allows tracing at all ELs. We don't want to compute this
+ *		at runtime, due to the additional setting of TRFCR_CX when
+ *		in EL2. Otherwise, 0.
+ * @config:	structure holding configuration parameters.
+ * @save_trfcr:	Saved TRFCR_EL1 register during a CPU PM event.
+ * @save_state:	State to be preserved across power loss
+ * @state_needs_restore: True when there is context to restore after PM exit
+ * @skip_power_up: Indicates if an implementation can skip powering up
+ *		   the trace unit.
+ * @arch_features: Bitmap of arch features of etmv4 devices.
+ */
+struct etmv4_drvdata {
+	struct clk			*pclk;
+	void __iomem			*base;
+	struct coresight_device		*csdev;
+	spinlock_t			spinlock;
+	local_t				mode;
+	int				cpu;
+	u8				arch;
+	u8				nr_pe;
+	u8				nr_pe_cmp;
+	u8				nr_addr_cmp;
+	u8				nr_cntr;
+	u8				nr_ext_inp;
+	u8				numcidc;
+	u8				numvmidc;
+	u8				nrseqstate;
+	u8				nr_event;
+	u8				nr_resource;
+	u8				nr_ss_cmp;
+	u8				trcid;
+	u8				trcid_size;
+	u8				ts_size;
+	u8				ctxid_size;
+	u8				vmid_size;
+	u8				ccsize;
+	u16				ccitmin;
+	u8				s_ex_level;
+	u8				ns_ex_level;
+	u8				q_support;
+	u8				os_lock_model;
+	bool				sticky_enable;
+	bool				boot_enable;
+	bool				os_unlock;
+	bool				instrp0;
+	bool				trcbb;
+	bool				trccond;
+	bool				retstack;
+	bool				trccci;
+	bool				trc_error;
+	bool				syncpr;
+	bool				stallctl;
+	bool				sysstall;
+	bool				nooverflow;
+	bool				atbtrig;
+	bool				lpoverride;
+	u64				trfcr;
+	struct etmv4_config		config;
+	u64				save_trfcr;
+	struct etmv4_save_state		*save_state;
+	bool				state_needs_restore;
+	bool				skip_power_up;
+	DECLARE_BITMAP(arch_features, ETM4_IMPDEF_FEATURE_MAX);
+};
+
+/* Address comparator access types */
+enum etm_addr_acctype {
+	TRCACATRn_TYPE_ADDR,
+	TRCACATRn_TYPE_DATA_LOAD_ADDR,
+	TRCACATRn_TYPE_DATA_STORE_ADDR,
+	TRCACATRn_TYPE_DATA_LOAD_STORE_ADDR,
+};
+
+/* Address comparator context types */
+enum etm_addr_ctxtype {
+	ETM_CTX_NONE,
+	ETM_CTX_CTXID,
+	ETM_CTX_VMID,
+	ETM_CTX_CTXID_VMID,
+};
+
+extern const struct attribute_group *coresight_etmv4_groups[];
+void etm4_config_trace_mode(struct etmv4_config *config);
+
+u64 etm4x_sysreg_read(u32 offset, bool _relaxed, bool _64bit);
+void etm4x_sysreg_write(u64 val, u32 offset, bool _relaxed, bool _64bit);
+
+static inline bool etm4x_is_ete(struct etmv4_drvdata *drvdata)
+{
+	return drvdata->arch >= ETM_ARCH_ETE;
+}
+
+int etm4_read_alloc_trace_id(struct etmv4_drvdata *drvdata);
+void etm4_release_trace_id(struct etmv4_drvdata *drvdata);
+#endif
diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c
new file mode 100644
index 0000000000..b8e150e45b
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-funnel.c
@@ -0,0 +1,443 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ *
+ * Description: CoreSight Funnel driver
+ */
+
+#include <linux/acpi.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/coresight.h>
+#include <linux/amba/bus.h>
+#include <linux/clk.h>
+
+#include "coresight-priv.h"
+
+#define FUNNEL_FUNCTL		0x000
+#define FUNNEL_PRICTL		0x004
+
+#define FUNNEL_HOLDTIME_MASK	0xf00
+#define FUNNEL_HOLDTIME_SHFT	0x8
+#define FUNNEL_HOLDTIME		(0x7 << FUNNEL_HOLDTIME_SHFT)
+#define FUNNEL_ENSx_MASK	0xff
+
+DEFINE_CORESIGHT_DEVLIST(funnel_devs, "funnel");
+
+/**
+ * struct funnel_drvdata - specifics associated to a funnel component
+ * @base:	memory mapped base address for this component.
+ * @atclk:	optional clock for the core parts of the funnel.
+ * @csdev:	component vitals needed by the framework.
+ * @priority:	port selection order.
+ * @spinlock:	serialize enable/disable operations.
+ */
+struct funnel_drvdata {
+	void __iomem		*base;
+	struct clk		*atclk;
+	struct coresight_device	*csdev;
+	unsigned long		priority;
+	spinlock_t		spinlock;
+};
+
+static int dynamic_funnel_enable_hw(struct funnel_drvdata *drvdata, int port)
+{
+	u32 functl;
+	int rc = 0;
+	struct coresight_device *csdev = drvdata->csdev;
+
+	CS_UNLOCK(drvdata->base);
+
+	functl = readl_relaxed(drvdata->base + FUNNEL_FUNCTL);
+	/* Claim the device only when we enable the first slave */
+	if (!(functl & FUNNEL_ENSx_MASK)) {
+		rc = coresight_claim_device_unlocked(csdev);
+		if (rc)
+			goto done;
+	}
+
+	functl &= ~FUNNEL_HOLDTIME_MASK;
+	functl |= FUNNEL_HOLDTIME;
+	functl |= (1 << port);
+	writel_relaxed(functl, drvdata->base + FUNNEL_FUNCTL);
+	writel_relaxed(drvdata->priority, drvdata->base + FUNNEL_PRICTL);
+done:
+	CS_LOCK(drvdata->base);
+	return rc;
+}
+
+static int funnel_enable(struct coresight_device *csdev,
+			 struct coresight_connection *in,
+			 struct coresight_connection *out)
+{
+	int rc = 0;
+	struct funnel_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	unsigned long flags;
+	bool first_enable = false;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	if (atomic_read(&in->dest_refcnt) == 0) {
+		if (drvdata->base)
+			rc = dynamic_funnel_enable_hw(drvdata, in->dest_port);
+		if (!rc)
+			first_enable = true;
+	}
+	if (!rc)
+		atomic_inc(&in->dest_refcnt);
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	if (first_enable)
+		dev_dbg(&csdev->dev, "FUNNEL inport %d enabled\n",
+			in->dest_port);
+	return rc;
+}
+
+static void dynamic_funnel_disable_hw(struct funnel_drvdata *drvdata,
+				      int inport)
+{
+	u32 functl;
+	struct coresight_device *csdev = drvdata->csdev;
+
+	CS_UNLOCK(drvdata->base);
+
+	functl = readl_relaxed(drvdata->base + FUNNEL_FUNCTL);
+	functl &= ~(1 << inport);
+	writel_relaxed(functl, drvdata->base + FUNNEL_FUNCTL);
+
+	/* Disclaim the device if none of the slaves are now active */
+	if (!(functl & FUNNEL_ENSx_MASK))
+		coresight_disclaim_device_unlocked(csdev);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void funnel_disable(struct coresight_device *csdev,
+			   struct coresight_connection *in,
+			   struct coresight_connection *out)
+{
+	struct funnel_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	unsigned long flags;
+	bool last_disable = false;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	if (atomic_dec_return(&in->dest_refcnt) == 0) {
+		if (drvdata->base)
+			dynamic_funnel_disable_hw(drvdata, in->dest_port);
+		last_disable = true;
+	}
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	if (last_disable)
+		dev_dbg(&csdev->dev, "FUNNEL inport %d disabled\n",
+			in->dest_port);
+}
+
+static const struct coresight_ops_link funnel_link_ops = {
+	.enable		= funnel_enable,
+	.disable	= funnel_disable,
+};
+
+static const struct coresight_ops funnel_cs_ops = {
+	.link_ops	= &funnel_link_ops,
+};
+
+static ssize_t priority_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct funnel_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	unsigned long val = drvdata->priority;
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t priority_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct funnel_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	drvdata->priority = val;
+	return size;
+}
+static DEVICE_ATTR_RW(priority);
+
+static u32 get_funnel_ctrl_hw(struct funnel_drvdata *drvdata)
+{
+	u32 functl;
+
+	CS_UNLOCK(drvdata->base);
+	functl = readl_relaxed(drvdata->base + FUNNEL_FUNCTL);
+	CS_LOCK(drvdata->base);
+
+	return functl;
+}
+
+static ssize_t funnel_ctrl_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	u32 val;
+	struct funnel_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	pm_runtime_get_sync(dev->parent);
+
+	val = get_funnel_ctrl_hw(drvdata);
+
+	pm_runtime_put(dev->parent);
+
+	return sprintf(buf, "%#x\n", val);
+}
+static DEVICE_ATTR_RO(funnel_ctrl);
+
+static struct attribute *coresight_funnel_attrs[] = {
+	&dev_attr_funnel_ctrl.attr,
+	&dev_attr_priority.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(coresight_funnel);
+
+static int funnel_probe(struct device *dev, struct resource *res)
+{
+	int ret;
+	void __iomem *base;
+	struct coresight_platform_data *pdata = NULL;
+	struct funnel_drvdata *drvdata;
+	struct coresight_desc desc = { 0 };
+
+	if (is_of_node(dev_fwnode(dev)) &&
+	    of_device_is_compatible(dev->of_node, "arm,coresight-funnel"))
+		dev_warn_once(dev, "Uses OBSOLETE CoreSight funnel binding\n");
+
+	desc.name = coresight_alloc_device_name(&funnel_devs, dev);
+	if (!desc.name)
+		return -ENOMEM;
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	drvdata->atclk = devm_clk_get(dev, "atclk"); /* optional */
+	if (!IS_ERR(drvdata->atclk)) {
+		ret = clk_prepare_enable(drvdata->atclk);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * Map the device base for dynamic-funnel, which has been
+	 * validated by AMBA core.
+	 */
+	if (res) {
+		base = devm_ioremap_resource(dev, res);
+		if (IS_ERR(base)) {
+			ret = PTR_ERR(base);
+			goto out_disable_clk;
+		}
+		drvdata->base = base;
+		desc.groups = coresight_funnel_groups;
+		desc.access = CSDEV_ACCESS_IOMEM(base);
+	}
+
+	dev_set_drvdata(dev, drvdata);
+
+	pdata = coresight_get_platform_data(dev);
+	if (IS_ERR(pdata)) {
+		ret = PTR_ERR(pdata);
+		goto out_disable_clk;
+	}
+	dev->platform_data = pdata;
+
+	spin_lock_init(&drvdata->spinlock);
+	desc.type = CORESIGHT_DEV_TYPE_LINK;
+	desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_MERG;
+	desc.ops = &funnel_cs_ops;
+	desc.pdata = pdata;
+	desc.dev = dev;
+	drvdata->csdev = coresight_register(&desc);
+	if (IS_ERR(drvdata->csdev)) {
+		ret = PTR_ERR(drvdata->csdev);
+		goto out_disable_clk;
+	}
+
+	pm_runtime_put(dev);
+	ret = 0;
+
+out_disable_clk:
+	if (ret && !IS_ERR_OR_NULL(drvdata->atclk))
+		clk_disable_unprepare(drvdata->atclk);
+	return ret;
+}
+
+static int funnel_remove(struct device *dev)
+{
+	struct funnel_drvdata *drvdata = dev_get_drvdata(dev);
+
+	coresight_unregister(drvdata->csdev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int funnel_runtime_suspend(struct device *dev)
+{
+	struct funnel_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata && !IS_ERR(drvdata->atclk))
+		clk_disable_unprepare(drvdata->atclk);
+
+	return 0;
+}
+
+static int funnel_runtime_resume(struct device *dev)
+{
+	struct funnel_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata && !IS_ERR(drvdata->atclk))
+		clk_prepare_enable(drvdata->atclk);
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops funnel_dev_pm_ops = {
+	SET_RUNTIME_PM_OPS(funnel_runtime_suspend, funnel_runtime_resume, NULL)
+};
+
+static int static_funnel_probe(struct platform_device *pdev)
+{
+	int ret;
+
+	pm_runtime_get_noresume(&pdev->dev);
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+
+	/* Static funnel do not have programming base */
+	ret = funnel_probe(&pdev->dev, NULL);
+
+	if (ret) {
+		pm_runtime_put_noidle(&pdev->dev);
+		pm_runtime_disable(&pdev->dev);
+	}
+
+	return ret;
+}
+
+static int static_funnel_remove(struct platform_device *pdev)
+{
+	funnel_remove(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+	return 0;
+}
+
+static const struct of_device_id static_funnel_match[] = {
+	{.compatible = "arm,coresight-static-funnel"},
+	{}
+};
+
+MODULE_DEVICE_TABLE(of, static_funnel_match);
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id static_funnel_ids[] = {
+	{"ARMHC9FE", 0},
+	{},
+};
+
+MODULE_DEVICE_TABLE(acpi, static_funnel_ids);
+#endif
+
+static struct platform_driver static_funnel_driver = {
+	.probe          = static_funnel_probe,
+	.remove          = static_funnel_remove,
+	.driver         = {
+		.name   = "coresight-static-funnel",
+		/* THIS_MODULE is taken care of by platform_driver_register() */
+		.of_match_table = static_funnel_match,
+		.acpi_match_table = ACPI_PTR(static_funnel_ids),
+		.pm	= &funnel_dev_pm_ops,
+		.suppress_bind_attrs = true,
+	},
+};
+
+static int dynamic_funnel_probe(struct amba_device *adev,
+				const struct amba_id *id)
+{
+	return funnel_probe(&adev->dev, &adev->res);
+}
+
+static void dynamic_funnel_remove(struct amba_device *adev)
+{
+	funnel_remove(&adev->dev);
+}
+
+static const struct amba_id dynamic_funnel_ids[] = {
+	{
+		.id     = 0x000bb908,
+		.mask   = 0x000fffff,
+	},
+	{
+		/* Coresight SoC-600 */
+		.id     = 0x000bb9eb,
+		.mask   = 0x000fffff,
+	},
+	{ 0, 0},
+};
+
+MODULE_DEVICE_TABLE(amba, dynamic_funnel_ids);
+
+static struct amba_driver dynamic_funnel_driver = {
+	.drv = {
+		.name	= "coresight-dynamic-funnel",
+		.owner	= THIS_MODULE,
+		.pm	= &funnel_dev_pm_ops,
+		.suppress_bind_attrs = true,
+	},
+	.probe		= dynamic_funnel_probe,
+	.remove		= dynamic_funnel_remove,
+	.id_table	= dynamic_funnel_ids,
+};
+
+static int __init funnel_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&static_funnel_driver);
+	if (ret) {
+		pr_info("Error registering platform driver\n");
+		return ret;
+	}
+
+	ret = amba_driver_register(&dynamic_funnel_driver);
+	if (ret) {
+		pr_info("Error registering amba driver\n");
+		platform_driver_unregister(&static_funnel_driver);
+	}
+
+	return ret;
+}
+
+static void __exit funnel_exit(void)
+{
+	platform_driver_unregister(&static_funnel_driver);
+	amba_driver_unregister(&dynamic_funnel_driver);
+}
+
+module_init(funnel_init);
+module_exit(funnel_exit);
+
+MODULE_AUTHOR("Pratik Patel <pratikp@codeaurora.org>");
+MODULE_AUTHOR("Mathieu Poirier <mathieu.poirier@linaro.org>");
+MODULE_DESCRIPTION("Arm CoreSight Funnel Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/hwtracing/coresight/coresight-platform.c b/drivers/hwtracing/coresight/coresight-platform.c
new file mode 100644
index 0000000000..9d550f5697
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-platform.c
@@ -0,0 +1,826 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/acpi.h>
+#include <linux/types.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/clk.h>
+#include <linux/of.h>
+#include <linux/of_graph.h>
+#include <linux/platform_device.h>
+#include <linux/amba/bus.h>
+#include <linux/coresight.h>
+#include <linux/cpumask.h>
+#include <asm/smp_plat.h>
+
+#include "coresight-priv.h"
+
+/*
+ * Add an entry to the connection list and assign @conn's contents to it.
+ *
+ * If the output port is already assigned on this device, return -EINVAL
+ */
+struct coresight_connection *
+coresight_add_out_conn(struct device *dev,
+		       struct coresight_platform_data *pdata,
+		       const struct coresight_connection *new_conn)
+{
+	int i;
+	struct coresight_connection *conn;
+
+	/*
+	 * Warn on any existing duplicate output port.
+	 */
+	for (i = 0; i < pdata->nr_outconns; ++i) {
+		conn = pdata->out_conns[i];
+		/* Output == -1 means ignore the port for example for helpers */
+		if (conn->src_port != -1 &&
+		    conn->src_port == new_conn->src_port) {
+			dev_warn(dev, "Duplicate output port %d\n",
+				 conn->src_port);
+			return ERR_PTR(-EINVAL);
+		}
+	}
+
+	pdata->nr_outconns++;
+	pdata->out_conns =
+		devm_krealloc_array(dev, pdata->out_conns, pdata->nr_outconns,
+				    sizeof(*pdata->out_conns), GFP_KERNEL);
+	if (!pdata->out_conns)
+		return ERR_PTR(-ENOMEM);
+
+	conn = devm_kmalloc(dev, sizeof(struct coresight_connection),
+			    GFP_KERNEL);
+	if (!conn)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 * Copy the new connection into the allocation, save the pointer to the
+	 * end of the connection array and also return it in case it needs to be
+	 * used right away.
+	 */
+	*conn = *new_conn;
+	pdata->out_conns[pdata->nr_outconns - 1] = conn;
+	return conn;
+}
+EXPORT_SYMBOL_GPL(coresight_add_out_conn);
+
+/*
+ * Add an input connection reference to @out_conn in the target's in_conns array
+ *
+ * @out_conn: Existing output connection to store as an input on the
+ *	      connection's remote device.
+ */
+int coresight_add_in_conn(struct coresight_connection *out_conn)
+{
+	int i;
+	struct device *dev = out_conn->dest_dev->dev.parent;
+	struct coresight_platform_data *pdata = out_conn->dest_dev->pdata;
+
+	for (i = 0; i < pdata->nr_inconns; ++i)
+		if (!pdata->in_conns[i]) {
+			pdata->in_conns[i] = out_conn;
+			return 0;
+		}
+
+	pdata->nr_inconns++;
+	pdata->in_conns =
+		devm_krealloc_array(dev, pdata->in_conns, pdata->nr_inconns,
+				    sizeof(*pdata->in_conns), GFP_KERNEL);
+	if (!pdata->in_conns)
+		return -ENOMEM;
+	pdata->in_conns[pdata->nr_inconns - 1] = out_conn;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(coresight_add_in_conn);
+
+static struct device *
+coresight_find_device_by_fwnode(struct fwnode_handle *fwnode)
+{
+	struct device *dev = NULL;
+
+	/*
+	 * If we have a non-configurable replicator, it will be found on the
+	 * platform bus.
+	 */
+	dev = bus_find_device_by_fwnode(&platform_bus_type, fwnode);
+	if (dev)
+		return dev;
+
+	/*
+	 * We have a configurable component - circle through the AMBA bus
+	 * looking for the device that matches the endpoint node.
+	 */
+	return bus_find_device_by_fwnode(&amba_bustype, fwnode);
+}
+
+/*
+ * Find a registered coresight device from a device fwnode.
+ * The node info is associated with the AMBA parent, but the
+ * csdev keeps a copy so iterate round the coresight bus to
+ * find the device.
+ */
+struct coresight_device *
+coresight_find_csdev_by_fwnode(struct fwnode_handle *r_fwnode)
+{
+	struct device *dev;
+	struct coresight_device *csdev = NULL;
+
+	dev = bus_find_device_by_fwnode(&coresight_bustype, r_fwnode);
+	if (dev) {
+		csdev = to_coresight_device(dev);
+		put_device(dev);
+	}
+	return csdev;
+}
+EXPORT_SYMBOL_GPL(coresight_find_csdev_by_fwnode);
+
+#ifdef CONFIG_OF
+static inline bool of_coresight_legacy_ep_is_input(struct device_node *ep)
+{
+	return of_property_read_bool(ep, "slave-mode");
+}
+
+static struct device_node *of_coresight_get_port_parent(struct device_node *ep)
+{
+	struct device_node *parent = of_graph_get_port_parent(ep);
+
+	/*
+	 * Skip one-level up to the real device node, if we
+	 * are using the new bindings.
+	 */
+	if (of_node_name_eq(parent, "in-ports") ||
+	    of_node_name_eq(parent, "out-ports"))
+		parent = of_get_next_parent(parent);
+
+	return parent;
+}
+
+static inline struct device_node *
+of_coresight_get_output_ports_node(const struct device_node *node)
+{
+	return of_get_child_by_name(node, "out-ports");
+}
+
+static int of_coresight_get_cpu(struct device *dev)
+{
+	int cpu;
+	struct device_node *dn;
+
+	if (!dev->of_node)
+		return -ENODEV;
+
+	dn = of_parse_phandle(dev->of_node, "cpu", 0);
+	if (!dn)
+		return -ENODEV;
+
+	cpu = of_cpu_node_to_id(dn);
+	of_node_put(dn);
+
+	return cpu;
+}
+
+/*
+ * of_coresight_parse_endpoint : Parse the given output endpoint @ep
+ * and fill the connection information in @pdata->out_conns
+ *
+ * Parses the local port, remote device name and the remote port.
+ *
+ * Returns :
+ *	 0	- If the parsing completed without any fatal errors.
+ *	-Errno	- Fatal error, abort the scanning.
+ */
+static int of_coresight_parse_endpoint(struct device *dev,
+				       struct device_node *ep,
+				       struct coresight_platform_data *pdata)
+{
+	int ret = 0;
+	struct of_endpoint endpoint, rendpoint;
+	struct device_node *rparent = NULL;
+	struct device_node *rep = NULL;
+	struct device *rdev = NULL;
+	struct fwnode_handle *rdev_fwnode;
+	struct coresight_connection conn = {};
+	struct coresight_connection *new_conn;
+
+	do {
+		/* Parse the local port details */
+		if (of_graph_parse_endpoint(ep, &endpoint))
+			break;
+		/*
+		 * Get a handle on the remote endpoint and the device it is
+		 * attached to.
+		 */
+		rep = of_graph_get_remote_endpoint(ep);
+		if (!rep)
+			break;
+		rparent = of_coresight_get_port_parent(rep);
+		if (!rparent)
+			break;
+		if (of_graph_parse_endpoint(rep, &rendpoint))
+			break;
+
+		rdev_fwnode = of_fwnode_handle(rparent);
+		/* If the remote device is not available, defer probing */
+		rdev = coresight_find_device_by_fwnode(rdev_fwnode);
+		if (!rdev) {
+			ret = -EPROBE_DEFER;
+			break;
+		}
+
+		conn.src_port = endpoint.port;
+		/*
+		 * Hold the refcount to the target device. This could be
+		 * released via:
+		 * 1) coresight_release_platform_data() if the probe fails or
+		 *    this device is unregistered.
+		 * 2) While removing the target device via
+		 *    coresight_remove_match()
+		 */
+		conn.dest_fwnode = fwnode_handle_get(rdev_fwnode);
+		conn.dest_port = rendpoint.port;
+
+		new_conn = coresight_add_out_conn(dev, pdata, &conn);
+		if (IS_ERR_VALUE(new_conn)) {
+			fwnode_handle_put(conn.dest_fwnode);
+			return PTR_ERR(new_conn);
+		}
+		/* Connection record updated */
+	} while (0);
+
+	of_node_put(rparent);
+	of_node_put(rep);
+	put_device(rdev);
+
+	return ret;
+}
+
+static int of_get_coresight_platform_data(struct device *dev,
+					  struct coresight_platform_data *pdata)
+{
+	int ret = 0;
+	struct device_node *ep = NULL;
+	const struct device_node *parent = NULL;
+	bool legacy_binding = false;
+	struct device_node *node = dev->of_node;
+
+	parent = of_coresight_get_output_ports_node(node);
+	/*
+	 * If the DT uses obsoleted bindings, the ports are listed
+	 * under the device and we need to filter out the input
+	 * ports.
+	 */
+	if (!parent) {
+		/*
+		 * Avoid warnings in of_graph_get_next_endpoint()
+		 * if the device doesn't have any graph connections
+		 */
+		if (!of_graph_is_present(node))
+			return 0;
+		legacy_binding = true;
+		parent = node;
+		dev_warn_once(dev, "Uses obsolete Coresight DT bindings\n");
+	}
+
+	/* Iterate through each output port to discover topology */
+	while ((ep = of_graph_get_next_endpoint(parent, ep))) {
+		/*
+		 * Legacy binding mixes input/output ports under the
+		 * same parent. So, skip the input ports if we are dealing
+		 * with legacy binding, as they processed with their
+		 * connected output ports.
+		 */
+		if (legacy_binding && of_coresight_legacy_ep_is_input(ep))
+			continue;
+
+		ret = of_coresight_parse_endpoint(dev, ep, pdata);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+#else
+static inline int
+of_get_coresight_platform_data(struct device *dev,
+			       struct coresight_platform_data *pdata)
+{
+	return -ENOENT;
+}
+
+static inline int of_coresight_get_cpu(struct device *dev)
+{
+	return -ENODEV;
+}
+#endif
+
+#ifdef CONFIG_ACPI
+
+#include <acpi/actypes.h>
+#include <acpi/processor.h>
+
+/* ACPI Graph _DSD UUID : "ab02a46b-74c7-45a2-bd68-f7d344ef2153" */
+static const guid_t acpi_graph_uuid = GUID_INIT(0xab02a46b, 0x74c7, 0x45a2,
+						0xbd, 0x68, 0xf7, 0xd3,
+						0x44, 0xef, 0x21, 0x53);
+/* Coresight ACPI Graph UUID : "3ecbc8b6-1d0e-4fb3-8107-e627f805c6cd" */
+static const guid_t coresight_graph_uuid = GUID_INIT(0x3ecbc8b6, 0x1d0e, 0x4fb3,
+						     0x81, 0x07, 0xe6, 0x27,
+						     0xf8, 0x05, 0xc6, 0xcd);
+#define ACPI_CORESIGHT_LINK_SLAVE	0
+#define ACPI_CORESIGHT_LINK_MASTER	1
+
+static inline bool is_acpi_guid(const union acpi_object *obj)
+{
+	return (obj->type == ACPI_TYPE_BUFFER) && (obj->buffer.length == 16);
+}
+
+/*
+ * acpi_guid_matches	- Checks if the given object is a GUID object and
+ * that it matches the supplied the GUID.
+ */
+static inline bool acpi_guid_matches(const union acpi_object *obj,
+				   const guid_t *guid)
+{
+	return is_acpi_guid(obj) &&
+	       guid_equal((guid_t *)obj->buffer.pointer, guid);
+}
+
+static inline bool is_acpi_dsd_graph_guid(const union acpi_object *obj)
+{
+	return acpi_guid_matches(obj, &acpi_graph_uuid);
+}
+
+static inline bool is_acpi_coresight_graph_guid(const union acpi_object *obj)
+{
+	return acpi_guid_matches(obj, &coresight_graph_uuid);
+}
+
+static inline bool is_acpi_coresight_graph(const union acpi_object *obj)
+{
+	const union acpi_object *graphid, *guid, *links;
+
+	if (obj->type != ACPI_TYPE_PACKAGE ||
+	    obj->package.count < 3)
+		return false;
+
+	graphid = &obj->package.elements[0];
+	guid = &obj->package.elements[1];
+	links = &obj->package.elements[2];
+
+	if (graphid->type != ACPI_TYPE_INTEGER ||
+	    links->type != ACPI_TYPE_INTEGER)
+		return false;
+
+	return is_acpi_coresight_graph_guid(guid);
+}
+
+/*
+ * acpi_validate_dsd_graph	- Make sure the given _DSD graph conforms
+ * to the ACPI _DSD Graph specification.
+ *
+ * ACPI Devices Graph property has the following format:
+ *  {
+ *	Revision	- Integer, must be 0
+ *	NumberOfGraphs	- Integer, N indicating the following list.
+ *	Graph[1],
+ *	 ...
+ *	Graph[N]
+ *  }
+ *
+ * And each Graph entry has the following format:
+ *  {
+ *	GraphID		- Integer, identifying a graph the device belongs to.
+ *	UUID		- UUID identifying the specification that governs
+ *			  this graph. (e.g, see is_acpi_coresight_graph())
+ *	NumberOfLinks	- Number "N" of connections on this node of the graph.
+ *	Links[1]
+ *	...
+ *	Links[N]
+ *  }
+ *
+ * Where each "Links" entry has the following format:
+ *
+ * {
+ *	SourcePortAddress	- Integer
+ *	DestinationPortAddress	- Integer
+ *	DestinationDeviceName	- Reference to another device
+ *	( --- CoreSight specific extensions below ---)
+ *	DirectionOfFlow		- Integer 1 for output(master)
+ *				  0 for input(slave)
+ * }
+ *
+ * e.g:
+ * For a Funnel device
+ *
+ * Device(MFUN) {
+ *   ...
+ *
+ *   Name (_DSD, Package() {
+ *	// DSD Package contains tuples of {  Proeprty_Type_UUID, Package() }
+ *	ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), //Std. Property UUID
+ *	Package() {
+ *		Package(2) { "property-name", <property-value> }
+ *	},
+ *
+ *	ToUUID("ab02a46b-74c7-45a2-bd68-f7d344ef2153"), // ACPI Graph UUID
+ *	Package() {
+ *	  0,		// Revision
+ *	  1,		// NumberOfGraphs.
+ *	  Package() {	// Graph[0] Package
+ *	     1,		// GraphID
+ *	     // Coresight Graph UUID
+ *	     ToUUID("3ecbc8b6-1d0e-4fb3-8107-e627f805c6cd"),
+ *	     3,		// NumberOfLinks aka ports
+ *	     // Link[0]: Output_0 -> Replicator:Input_0
+ *	     Package () { 0, 0, \_SB_.RPL0, 1 },
+ *	     // Link[1]: Input_0 <- Cluster0_Funnel0:Output_0
+ *	     Package () { 0, 0, \_SB_.CLU0.FUN0, 0 },
+ *	     // Link[2]: Input_1 <- Cluster1_Funnel0:Output_0
+ *	      Package () { 1, 0, \_SB_.CLU1.FUN0, 0 },
+ *	  }	// End of Graph[0] Package
+ *
+ *	}, // End of ACPI Graph Property
+ *  })
+ */
+static inline bool acpi_validate_dsd_graph(const union acpi_object *graph)
+{
+	int i, n;
+	const union acpi_object *rev, *nr_graphs;
+
+	/* The graph must contain at least the Revision and Number of Graphs */
+	if (graph->package.count < 2)
+		return false;
+
+	rev = &graph->package.elements[0];
+	nr_graphs = &graph->package.elements[1];
+
+	if (rev->type != ACPI_TYPE_INTEGER ||
+	    nr_graphs->type != ACPI_TYPE_INTEGER)
+		return false;
+
+	/* We only support revision 0 */
+	if (rev->integer.value != 0)
+		return false;
+
+	n = nr_graphs->integer.value;
+	/* CoreSight devices are only part of a single Graph */
+	if (n != 1)
+		return false;
+
+	/* Make sure the ACPI graph package has right number of elements */
+	if (graph->package.count != (n + 2))
+		return false;
+
+	/*
+	 * Each entry must be a graph package with at least 3 members :
+	 * { GraphID, UUID, NumberOfLinks(n), Links[.],... }
+	 */
+	for (i = 2; i < n + 2; i++) {
+		const union acpi_object *obj = &graph->package.elements[i];
+
+		if (obj->type != ACPI_TYPE_PACKAGE ||
+		    obj->package.count < 3)
+			return false;
+	}
+
+	return true;
+}
+
+/* acpi_get_dsd_graph	- Find the _DSD Graph property for the given device. */
+static const union acpi_object *
+acpi_get_dsd_graph(struct acpi_device *adev, struct acpi_buffer *buf)
+{
+	int i;
+	acpi_status status;
+	const union acpi_object *dsd;
+
+	status = acpi_evaluate_object_typed(adev->handle, "_DSD", NULL,
+					    buf, ACPI_TYPE_PACKAGE);
+	if (ACPI_FAILURE(status))
+		return NULL;
+
+	dsd = buf->pointer;
+
+	/*
+	 * _DSD property consists tuples { Prop_UUID, Package() }
+	 * Iterate through all the packages and find the Graph.
+	 */
+	for (i = 0; i + 1 < dsd->package.count; i += 2) {
+		const union acpi_object *guid, *package;
+
+		guid = &dsd->package.elements[i];
+		package = &dsd->package.elements[i + 1];
+
+		/* All _DSD elements must have a UUID and a Package */
+		if (!is_acpi_guid(guid) || package->type != ACPI_TYPE_PACKAGE)
+			break;
+		/* Skip the non-Graph _DSD packages */
+		if (!is_acpi_dsd_graph_guid(guid))
+			continue;
+		if (acpi_validate_dsd_graph(package))
+			return package;
+		/* Invalid graph format, continue */
+		dev_warn(&adev->dev, "Invalid Graph _DSD property\n");
+	}
+
+	return NULL;
+}
+
+static inline bool
+acpi_validate_coresight_graph(const union acpi_object *cs_graph)
+{
+	int nlinks;
+
+	nlinks = cs_graph->package.elements[2].integer.value;
+	/*
+	 * Graph must have the following fields :
+	 * { GraphID, GraphUUID, NumberOfLinks, Links... }
+	 */
+	if (cs_graph->package.count != (nlinks + 3))
+		return false;
+	/* The links are validated in acpi_coresight_parse_link() */
+	return true;
+}
+
+/*
+ * acpi_get_coresight_graph	- Parse the device _DSD tables and find
+ * the Graph property matching the CoreSight Graphs.
+ *
+ * Returns the pointer to the CoreSight Graph Package when found. Otherwise
+ * returns NULL.
+ */
+static const union acpi_object *
+acpi_get_coresight_graph(struct acpi_device *adev, struct acpi_buffer *buf)
+{
+	const union acpi_object *graph_list, *graph;
+	int i, nr_graphs;
+
+	graph_list = acpi_get_dsd_graph(adev, buf);
+	if (!graph_list)
+		return graph_list;
+
+	nr_graphs = graph_list->package.elements[1].integer.value;
+
+	for (i = 2; i < nr_graphs + 2; i++) {
+		graph = &graph_list->package.elements[i];
+		if (!is_acpi_coresight_graph(graph))
+			continue;
+		if (acpi_validate_coresight_graph(graph))
+			return graph;
+		/* Invalid graph format */
+		break;
+	}
+
+	return NULL;
+}
+
+/*
+ * acpi_coresight_parse_link	- Parse the given Graph connection
+ * of the device and populate the coresight_connection for an output
+ * connection.
+ *
+ * CoreSight Graph specification mandates that the direction of the data
+ * flow must be specified in the link. i.e,
+ *
+ *	SourcePortAddress,	// Integer
+ *	DestinationPortAddress,	// Integer
+ *	DestinationDeviceName,	// Reference to another device
+ *	DirectionOfFlow,	// 1 for output(master), 0 for input(slave)
+ *
+ * Returns the direction of the data flow [ Input(slave) or Output(master) ]
+ * upon success.
+ * Returns an negative error number otherwise.
+ */
+static int acpi_coresight_parse_link(struct acpi_device *adev,
+				     const union acpi_object *link,
+				     struct coresight_connection *conn)
+{
+	int dir;
+	const union acpi_object *fields;
+	struct acpi_device *r_adev;
+	struct device *rdev;
+
+	if (link->type != ACPI_TYPE_PACKAGE ||
+	    link->package.count != 4)
+		return -EINVAL;
+
+	fields = link->package.elements;
+
+	if (fields[0].type != ACPI_TYPE_INTEGER ||
+	    fields[1].type != ACPI_TYPE_INTEGER ||
+	    fields[2].type != ACPI_TYPE_LOCAL_REFERENCE ||
+	    fields[3].type != ACPI_TYPE_INTEGER)
+		return -EINVAL;
+
+	r_adev = acpi_fetch_acpi_dev(fields[2].reference.handle);
+	if (!r_adev)
+		return -ENODEV;
+
+	dir = fields[3].integer.value;
+	if (dir == ACPI_CORESIGHT_LINK_MASTER) {
+		conn->src_port = fields[0].integer.value;
+		conn->dest_port = fields[1].integer.value;
+		rdev = coresight_find_device_by_fwnode(&r_adev->fwnode);
+		if (!rdev)
+			return -EPROBE_DEFER;
+		/*
+		 * Hold the refcount to the target device. This could be
+		 * released via:
+		 * 1) coresight_release_platform_data() if the probe fails or
+		 *    this device is unregistered.
+		 * 2) While removing the target device via
+		 *    coresight_remove_match().
+		 */
+		conn->dest_fwnode = fwnode_handle_get(&r_adev->fwnode);
+	} else if (dir == ACPI_CORESIGHT_LINK_SLAVE) {
+		/*
+		 * We are only interested in the port number
+		 * for the input ports at this component.
+		 * Store the port number in child_port.
+		 */
+		conn->dest_port = fields[0].integer.value;
+	} else {
+		/* Invalid direction */
+		return -EINVAL;
+	}
+
+	return dir;
+}
+
+/*
+ * acpi_coresight_parse_graph	- Parse the _DSD CoreSight graph
+ * connection information and populate the supplied coresight_platform_data
+ * instance.
+ */
+static int acpi_coresight_parse_graph(struct device *dev,
+				      struct acpi_device *adev,
+				      struct coresight_platform_data *pdata)
+{
+	int ret = 0;
+	int i, nlinks;
+	const union acpi_object *graph;
+	struct coresight_connection conn, zero_conn = {};
+	struct coresight_connection *new_conn;
+	struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
+
+	graph = acpi_get_coresight_graph(adev, &buf);
+	/*
+	 * There are no graph connections, which is fine for some components.
+	 * e.g., ETE
+	 */
+	if (!graph)
+		goto free;
+
+	nlinks = graph->package.elements[2].integer.value;
+	if (!nlinks)
+		goto free;
+
+	for (i = 0; i < nlinks; i++) {
+		const union acpi_object *link = &graph->package.elements[3 + i];
+		int dir;
+
+		conn = zero_conn;
+		dir = acpi_coresight_parse_link(adev, link, &conn);
+		if (dir < 0) {
+			ret = dir;
+			goto free;
+		}
+
+		if (dir == ACPI_CORESIGHT_LINK_MASTER) {
+			new_conn = coresight_add_out_conn(dev, pdata, &conn);
+			if (IS_ERR(new_conn)) {
+				ret = PTR_ERR(new_conn);
+				goto free;
+			}
+		}
+	}
+
+free:
+	/*
+	 * When ACPI fails to alloc a buffer, it will free the buffer
+	 * created via ACPI_ALLOCATE_BUFFER and set to NULL.
+	 * ACPI_FREE can handle NULL pointers, so free it directly.
+	 */
+	ACPI_FREE(buf.pointer);
+	return ret;
+}
+
+/*
+ * acpi_handle_to_logical_cpuid - Map a given acpi_handle to the
+ * logical CPU id of the corresponding CPU device.
+ *
+ * Returns the logical CPU id when found. Otherwise returns >= nr_cpus_id.
+ */
+static int
+acpi_handle_to_logical_cpuid(acpi_handle handle)
+{
+	int i;
+	struct acpi_processor *pr;
+
+	for_each_possible_cpu(i) {
+		pr = per_cpu(processors, i);
+		if (pr && pr->handle == handle)
+			break;
+	}
+
+	return i;
+}
+
+/*
+ * acpi_coresigh_get_cpu - Find the logical CPU id of the CPU associated
+ * with this coresight device. With ACPI bindings, the CoreSight components
+ * are listed as child device of the associated CPU.
+ *
+ * Returns the logical CPU id when found. Otherwise returns 0.
+ */
+static int acpi_coresight_get_cpu(struct device *dev)
+{
+	int cpu;
+	acpi_handle cpu_handle;
+	acpi_status status;
+	struct acpi_device *adev = ACPI_COMPANION(dev);
+
+	if (!adev)
+		return -ENODEV;
+	status = acpi_get_parent(adev->handle, &cpu_handle);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	cpu = acpi_handle_to_logical_cpuid(cpu_handle);
+	if (cpu >= nr_cpu_ids)
+		return -ENODEV;
+	return cpu;
+}
+
+static int
+acpi_get_coresight_platform_data(struct device *dev,
+				 struct coresight_platform_data *pdata)
+{
+	struct acpi_device *adev;
+
+	adev = ACPI_COMPANION(dev);
+	if (!adev)
+		return -EINVAL;
+
+	return acpi_coresight_parse_graph(dev, adev, pdata);
+}
+
+#else
+
+static inline int
+acpi_get_coresight_platform_data(struct device *dev,
+				 struct coresight_platform_data *pdata)
+{
+	return -ENOENT;
+}
+
+static inline int acpi_coresight_get_cpu(struct device *dev)
+{
+	return -ENODEV;
+}
+#endif
+
+int coresight_get_cpu(struct device *dev)
+{
+	if (is_of_node(dev->fwnode))
+		return of_coresight_get_cpu(dev);
+	else if (is_acpi_device_node(dev->fwnode))
+		return acpi_coresight_get_cpu(dev);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(coresight_get_cpu);
+
+struct coresight_platform_data *
+coresight_get_platform_data(struct device *dev)
+{
+	int ret = -ENOENT;
+	struct coresight_platform_data *pdata = NULL;
+	struct fwnode_handle *fwnode = dev_fwnode(dev);
+
+	if (IS_ERR_OR_NULL(fwnode))
+		goto error;
+
+	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	if (is_of_node(fwnode))
+		ret = of_get_coresight_platform_data(dev, pdata);
+	else if (is_acpi_device_node(fwnode))
+		ret = acpi_get_coresight_platform_data(dev, pdata);
+
+	if (!ret)
+		return pdata;
+error:
+	if (!IS_ERR_OR_NULL(pdata))
+		/* Cleanup the connection information */
+		coresight_release_platform_data(NULL, dev, pdata);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(coresight_get_platform_data);
diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
new file mode 100644
index 0000000000..767076e079
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-priv.h
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _CORESIGHT_PRIV_H
+#define _CORESIGHT_PRIV_H
+
+#include <linux/amba/bus.h>
+#include <linux/bitops.h>
+#include <linux/io.h>
+#include <linux/coresight.h>
+#include <linux/pm_runtime.h>
+
+/*
+ * Coresight management registers (0xf00-0xfcc)
+ * 0xfa0 - 0xfa4: Management	registers in PFTv1.0
+ *		  Trace		registers in PFTv1.1
+ */
+#define CORESIGHT_ITCTRL	0xf00
+#define CORESIGHT_CLAIMSET	0xfa0
+#define CORESIGHT_CLAIMCLR	0xfa4
+#define CORESIGHT_LAR		0xfb0
+#define CORESIGHT_LSR		0xfb4
+#define CORESIGHT_DEVARCH	0xfbc
+#define CORESIGHT_AUTHSTATUS	0xfb8
+#define CORESIGHT_DEVID		0xfc8
+#define CORESIGHT_DEVTYPE	0xfcc
+
+
+/*
+ * Coresight device CLAIM protocol.
+ * See PSCI - ARM DEN 0022D, Section: 6.8.1 Debug and Trace save and restore.
+ */
+#define CORESIGHT_CLAIM_SELF_HOSTED	BIT(1)
+
+#define TIMEOUT_US		100
+#define BMVAL(val, lsb, msb)	((val & GENMASK(msb, lsb)) >> lsb)
+
+#define ETM_MODE_EXCL_KERN	BIT(30)
+#define ETM_MODE_EXCL_USER	BIT(31)
+struct cs_pair_attribute {
+	struct device_attribute attr;
+	u32 lo_off;
+	u32 hi_off;
+};
+
+struct cs_off_attribute {
+	struct device_attribute attr;
+	u32 off;
+};
+
+extern ssize_t coresight_simple_show32(struct device *_dev,
+				     struct device_attribute *attr, char *buf);
+extern ssize_t coresight_simple_show_pair(struct device *_dev,
+				     struct device_attribute *attr, char *buf);
+
+#define coresight_simple_reg32(name, offset)				\
+	(&((struct cs_off_attribute[]) {				\
+	   {								\
+		__ATTR(name, 0444, coresight_simple_show32, NULL),	\
+		offset							\
+	   }								\
+	})[0].attr.attr)
+
+#define coresight_simple_reg64(name, lo_off, hi_off)			\
+	(&((struct cs_pair_attribute[]) {				\
+	   {								\
+		__ATTR(name, 0444, coresight_simple_show_pair, NULL),	\
+		lo_off, hi_off						\
+	   }								\
+	})[0].attr.attr)
+
+extern const u32 coresight_barrier_pkt[4];
+#define CORESIGHT_BARRIER_PKT_SIZE (sizeof(coresight_barrier_pkt))
+
+enum etm_addr_type {
+	ETM_ADDR_TYPE_NONE,
+	ETM_ADDR_TYPE_SINGLE,
+	ETM_ADDR_TYPE_RANGE,
+	ETM_ADDR_TYPE_START,
+	ETM_ADDR_TYPE_STOP,
+};
+
+/**
+ * struct cs_buffer - keep track of a recording session' specifics
+ * @cur:	index of the current buffer
+ * @nr_pages:	max number of pages granted to us
+ * @pid:	PID this cs_buffer belongs to
+ * @offset:	offset within the current buffer
+ * @data_size:	how much we collected in this run
+ * @snapshot:	is this run in snapshot mode
+ * @data_pages:	a handle the ring buffer
+ */
+struct cs_buffers {
+	unsigned int		cur;
+	unsigned int		nr_pages;
+	pid_t			pid;
+	unsigned long		offset;
+	local_t			data_size;
+	bool			snapshot;
+	void			**data_pages;
+};
+
+static inline void coresight_insert_barrier_packet(void *buf)
+{
+	if (buf)
+		memcpy(buf, coresight_barrier_pkt, CORESIGHT_BARRIER_PKT_SIZE);
+}
+
+static inline void CS_LOCK(void __iomem *addr)
+{
+	do {
+		/* Wait for things to settle */
+		mb();
+		writel_relaxed(0x0, addr + CORESIGHT_LAR);
+	} while (0);
+}
+
+static inline void CS_UNLOCK(void __iomem *addr)
+{
+	do {
+		writel_relaxed(CORESIGHT_UNLOCK, addr + CORESIGHT_LAR);
+		/* Make sure everyone has seen this */
+		mb();
+	} while (0);
+}
+
+void coresight_disable_path(struct list_head *path);
+int coresight_enable_path(struct list_head *path, enum cs_mode mode,
+			  void *sink_data);
+struct coresight_device *coresight_get_sink(struct list_head *path);
+struct coresight_device *
+coresight_get_enabled_sink(struct coresight_device *source);
+struct coresight_device *coresight_get_sink_by_id(u32 id);
+struct coresight_device *
+coresight_find_default_sink(struct coresight_device *csdev);
+struct list_head *coresight_build_path(struct coresight_device *csdev,
+				       struct coresight_device *sink);
+void coresight_release_path(struct list_head *path);
+int coresight_add_sysfs_link(struct coresight_sysfs_link *info);
+void coresight_remove_sysfs_link(struct coresight_sysfs_link *info);
+int coresight_create_conns_sysfs_group(struct coresight_device *csdev);
+void coresight_remove_conns_sysfs_group(struct coresight_device *csdev);
+int coresight_make_links(struct coresight_device *orig,
+			 struct coresight_connection *conn,
+			 struct coresight_device *target);
+void coresight_remove_links(struct coresight_device *orig,
+			    struct coresight_connection *conn);
+
+#if IS_ENABLED(CONFIG_CORESIGHT_SOURCE_ETM3X)
+extern int etm_readl_cp14(u32 off, unsigned int *val);
+extern int etm_writel_cp14(u32 off, u32 val);
+#else
+static inline int etm_readl_cp14(u32 off, unsigned int *val) { return 0; }
+static inline int etm_writel_cp14(u32 off, u32 val) { return 0; }
+#endif
+
+struct cti_assoc_op {
+	void (*add)(struct coresight_device *csdev);
+	void (*remove)(struct coresight_device *csdev);
+};
+
+extern void coresight_set_cti_ops(const struct cti_assoc_op *cti_op);
+extern void coresight_remove_cti_ops(void);
+
+/*
+ * Macros and inline functions to handle CoreSight UCI data and driver
+ * private data in AMBA ID table entries, and extract data values.
+ */
+
+/* coresight AMBA ID, no UCI, no driver data: id table entry */
+#define CS_AMBA_ID(pid)			\
+	{				\
+		.id	= pid,		\
+		.mask	= 0x000fffff,	\
+	}
+
+/* coresight AMBA ID, UCI with driver data only: id table entry. */
+#define CS_AMBA_ID_DATA(pid, dval)				\
+	{							\
+		.id	= pid,					\
+		.mask	= 0x000fffff,				\
+		.data	=  (void *)&(struct amba_cs_uci_id)	\
+			{				\
+				.data = (void *)dval,	\
+			}				\
+	}
+
+/* coresight AMBA ID, full UCI structure: id table entry. */
+#define __CS_AMBA_UCI_ID(pid, m, uci_ptr)	\
+	{					\
+		.id	= pid,			\
+		.mask	= m,			\
+		.data	= (void *)uci_ptr	\
+	}
+#define CS_AMBA_UCI_ID(pid, uci)	__CS_AMBA_UCI_ID(pid, 0x000fffff, uci)
+/*
+ * PIDR2[JEDEC], BIT(3) must be 1 (Read As One) to indicate that rest of the
+ * PIDR1, PIDR2 DES_* fields follow JEDEC encoding for the designer. Use that
+ * as a match value for blanket matching all devices in the given CoreSight
+ * device type and architecture.
+ */
+#define PIDR2_JEDEC			BIT(3)
+#define PID_PIDR2_JEDEC			(PIDR2_JEDEC << 16)
+/*
+ * Match all PIDs in a given CoreSight device type and architecture, defined
+ * by the uci.
+ */
+#define CS_AMBA_MATCH_ALL_UCI(uci)					\
+	__CS_AMBA_UCI_ID(PID_PIDR2_JEDEC, PID_PIDR2_JEDEC, uci)
+
+/* extract the data value from a UCI structure given amba_id pointer. */
+static inline void *coresight_get_uci_data(const struct amba_id *id)
+{
+	struct amba_cs_uci_id *uci_id = id->data;
+
+	if (!uci_id)
+		return NULL;
+
+	return uci_id->data;
+}
+
+void coresight_release_platform_data(struct coresight_device *csdev,
+				     struct device *dev,
+				     struct coresight_platform_data *pdata);
+struct coresight_device *
+coresight_find_csdev_by_fwnode(struct fwnode_handle *r_fwnode);
+void coresight_add_helper(struct coresight_device *csdev,
+			  struct coresight_device *helper);
+
+void coresight_set_percpu_sink(int cpu, struct coresight_device *csdev);
+struct coresight_device *coresight_get_percpu_sink(int cpu);
+int coresight_enable_source(struct coresight_device *csdev, enum cs_mode mode,
+			    void *data);
+bool coresight_disable_source(struct coresight_device *csdev, void *data);
+
+#endif
diff --git a/drivers/hwtracing/coresight/coresight-replicator.c b/drivers/hwtracing/coresight/coresight-replicator.c
new file mode 100644
index 0000000000..b6be730349
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-replicator.c
@@ -0,0 +1,449 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2011-2015, The Linux Foundation. All rights reserved.
+ *
+ * Description: CoreSight Replicator driver
+ */
+
+#include <linux/acpi.h>
+#include <linux/amba/bus.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/pm_runtime.h>
+#include <linux/property.h>
+#include <linux/clk.h>
+#include <linux/of.h>
+#include <linux/coresight.h>
+
+#include "coresight-priv.h"
+
+#define REPLICATOR_IDFILTER0		0x000
+#define REPLICATOR_IDFILTER1		0x004
+
+DEFINE_CORESIGHT_DEVLIST(replicator_devs, "replicator");
+
+/**
+ * struct replicator_drvdata - specifics associated to a replicator component
+ * @base:	memory mapped base address for this component. Also indicates
+ *		whether this one is programmable or not.
+ * @atclk:	optional clock for the core parts of the replicator.
+ * @csdev:	component vitals needed by the framework
+ * @spinlock:	serialize enable/disable operations.
+ * @check_idfilter_val: check if the context is lost upon clock removal.
+ */
+struct replicator_drvdata {
+	void __iomem		*base;
+	struct clk		*atclk;
+	struct coresight_device	*csdev;
+	spinlock_t		spinlock;
+	bool			check_idfilter_val;
+};
+
+static void dynamic_replicator_reset(struct replicator_drvdata *drvdata)
+{
+	struct coresight_device *csdev = drvdata->csdev;
+
+	CS_UNLOCK(drvdata->base);
+
+	if (!coresight_claim_device_unlocked(csdev)) {
+		writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER0);
+		writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER1);
+		coresight_disclaim_device_unlocked(csdev);
+	}
+
+	CS_LOCK(drvdata->base);
+}
+
+/*
+ * replicator_reset : Reset the replicator configuration to sane values.
+ */
+static inline void replicator_reset(struct replicator_drvdata *drvdata)
+{
+	if (drvdata->base)
+		dynamic_replicator_reset(drvdata);
+}
+
+static int dynamic_replicator_enable(struct replicator_drvdata *drvdata,
+				     int inport, int outport)
+{
+	int rc = 0;
+	u32 id0val, id1val;
+	struct coresight_device *csdev = drvdata->csdev;
+
+	CS_UNLOCK(drvdata->base);
+
+	id0val = readl_relaxed(drvdata->base + REPLICATOR_IDFILTER0);
+	id1val = readl_relaxed(drvdata->base + REPLICATOR_IDFILTER1);
+
+	/*
+	 * Some replicator designs lose context when AMBA clocks are removed,
+	 * so have a check for this.
+	 */
+	if (drvdata->check_idfilter_val && id0val == 0x0 && id1val == 0x0)
+		id0val = id1val = 0xff;
+
+	if (id0val == 0xff && id1val == 0xff)
+		rc = coresight_claim_device_unlocked(csdev);
+
+	if (!rc) {
+		switch (outport) {
+		case 0:
+			id0val = 0x0;
+			break;
+		case 1:
+			id1val = 0x0;
+			break;
+		default:
+			WARN_ON(1);
+			rc = -EINVAL;
+		}
+	}
+
+	/* Ensure that the outport is enabled. */
+	if (!rc) {
+		writel_relaxed(id0val, drvdata->base + REPLICATOR_IDFILTER0);
+		writel_relaxed(id1val, drvdata->base + REPLICATOR_IDFILTER1);
+	}
+
+	CS_LOCK(drvdata->base);
+
+	return rc;
+}
+
+static int replicator_enable(struct coresight_device *csdev,
+			     struct coresight_connection *in,
+			     struct coresight_connection *out)
+{
+	int rc = 0;
+	struct replicator_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	unsigned long flags;
+	bool first_enable = false;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	if (atomic_read(&out->src_refcnt) == 0) {
+		if (drvdata->base)
+			rc = dynamic_replicator_enable(drvdata, in->dest_port,
+						       out->src_port);
+		if (!rc)
+			first_enable = true;
+	}
+	if (!rc)
+		atomic_inc(&out->src_refcnt);
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	if (first_enable)
+		dev_dbg(&csdev->dev, "REPLICATOR enabled\n");
+	return rc;
+}
+
+static void dynamic_replicator_disable(struct replicator_drvdata *drvdata,
+				       int inport, int outport)
+{
+	u32 reg;
+	struct coresight_device *csdev = drvdata->csdev;
+
+	switch (outport) {
+	case 0:
+		reg = REPLICATOR_IDFILTER0;
+		break;
+	case 1:
+		reg = REPLICATOR_IDFILTER1;
+		break;
+	default:
+		WARN_ON(1);
+		return;
+	}
+
+	CS_UNLOCK(drvdata->base);
+
+	/* disable the flow of ATB data through port */
+	writel_relaxed(0xff, drvdata->base + reg);
+
+	if ((readl_relaxed(drvdata->base + REPLICATOR_IDFILTER0) == 0xff) &&
+	    (readl_relaxed(drvdata->base + REPLICATOR_IDFILTER1) == 0xff))
+		coresight_disclaim_device_unlocked(csdev);
+	CS_LOCK(drvdata->base);
+}
+
+static void replicator_disable(struct coresight_device *csdev,
+			       struct coresight_connection *in,
+			       struct coresight_connection *out)
+{
+	struct replicator_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	unsigned long flags;
+	bool last_disable = false;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	if (atomic_dec_return(&out->src_refcnt) == 0) {
+		if (drvdata->base)
+			dynamic_replicator_disable(drvdata, in->dest_port,
+						   out->src_port);
+		last_disable = true;
+	}
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	if (last_disable)
+		dev_dbg(&csdev->dev, "REPLICATOR disabled\n");
+}
+
+static const struct coresight_ops_link replicator_link_ops = {
+	.enable		= replicator_enable,
+	.disable	= replicator_disable,
+};
+
+static const struct coresight_ops replicator_cs_ops = {
+	.link_ops	= &replicator_link_ops,
+};
+
+static struct attribute *replicator_mgmt_attrs[] = {
+	coresight_simple_reg32(idfilter0, REPLICATOR_IDFILTER0),
+	coresight_simple_reg32(idfilter1, REPLICATOR_IDFILTER1),
+	NULL,
+};
+
+static const struct attribute_group replicator_mgmt_group = {
+	.attrs = replicator_mgmt_attrs,
+	.name = "mgmt",
+};
+
+static const struct attribute_group *replicator_groups[] = {
+	&replicator_mgmt_group,
+	NULL,
+};
+
+static int replicator_probe(struct device *dev, struct resource *res)
+{
+	int ret = 0;
+	struct coresight_platform_data *pdata = NULL;
+	struct replicator_drvdata *drvdata;
+	struct coresight_desc desc = { 0 };
+	void __iomem *base;
+
+	if (is_of_node(dev_fwnode(dev)) &&
+	    of_device_is_compatible(dev->of_node, "arm,coresight-replicator"))
+		dev_warn_once(dev,
+			      "Uses OBSOLETE CoreSight replicator binding\n");
+
+	desc.name = coresight_alloc_device_name(&replicator_devs, dev);
+	if (!desc.name)
+		return -ENOMEM;
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	drvdata->atclk = devm_clk_get(dev, "atclk"); /* optional */
+	if (!IS_ERR(drvdata->atclk)) {
+		ret = clk_prepare_enable(drvdata->atclk);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * Map the device base for dynamic-replicator, which has been
+	 * validated by AMBA core
+	 */
+	if (res) {
+		base = devm_ioremap_resource(dev, res);
+		if (IS_ERR(base)) {
+			ret = PTR_ERR(base);
+			goto out_disable_clk;
+		}
+		drvdata->base = base;
+		desc.groups = replicator_groups;
+		desc.access = CSDEV_ACCESS_IOMEM(base);
+	}
+
+	if (fwnode_property_present(dev_fwnode(dev),
+				    "qcom,replicator-loses-context"))
+		drvdata->check_idfilter_val = true;
+
+	dev_set_drvdata(dev, drvdata);
+
+	pdata = coresight_get_platform_data(dev);
+	if (IS_ERR(pdata)) {
+		ret = PTR_ERR(pdata);
+		goto out_disable_clk;
+	}
+	dev->platform_data = pdata;
+
+	spin_lock_init(&drvdata->spinlock);
+	desc.type = CORESIGHT_DEV_TYPE_LINK;
+	desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_SPLIT;
+	desc.ops = &replicator_cs_ops;
+	desc.pdata = dev->platform_data;
+	desc.dev = dev;
+
+	drvdata->csdev = coresight_register(&desc);
+	if (IS_ERR(drvdata->csdev)) {
+		ret = PTR_ERR(drvdata->csdev);
+		goto out_disable_clk;
+	}
+
+	replicator_reset(drvdata);
+	pm_runtime_put(dev);
+
+out_disable_clk:
+	if (ret && !IS_ERR_OR_NULL(drvdata->atclk))
+		clk_disable_unprepare(drvdata->atclk);
+	return ret;
+}
+
+static int replicator_remove(struct device *dev)
+{
+	struct replicator_drvdata *drvdata = dev_get_drvdata(dev);
+
+	coresight_unregister(drvdata->csdev);
+	return 0;
+}
+
+static int static_replicator_probe(struct platform_device *pdev)
+{
+	int ret;
+
+	pm_runtime_get_noresume(&pdev->dev);
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+
+	/* Static replicators do not have programming base */
+	ret = replicator_probe(&pdev->dev, NULL);
+
+	if (ret) {
+		pm_runtime_put_noidle(&pdev->dev);
+		pm_runtime_disable(&pdev->dev);
+	}
+
+	return ret;
+}
+
+static int static_replicator_remove(struct platform_device *pdev)
+{
+	replicator_remove(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int replicator_runtime_suspend(struct device *dev)
+{
+	struct replicator_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata && !IS_ERR(drvdata->atclk))
+		clk_disable_unprepare(drvdata->atclk);
+
+	return 0;
+}
+
+static int replicator_runtime_resume(struct device *dev)
+{
+	struct replicator_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata && !IS_ERR(drvdata->atclk))
+		clk_prepare_enable(drvdata->atclk);
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops replicator_dev_pm_ops = {
+	SET_RUNTIME_PM_OPS(replicator_runtime_suspend,
+			   replicator_runtime_resume, NULL)
+};
+
+static const struct of_device_id static_replicator_match[] = {
+	{.compatible = "arm,coresight-replicator"},
+	{.compatible = "arm,coresight-static-replicator"},
+	{}
+};
+
+MODULE_DEVICE_TABLE(of, static_replicator_match);
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id static_replicator_acpi_ids[] = {
+	{"ARMHC985", 0}, /* ARM CoreSight Static Replicator */
+	{}
+};
+
+MODULE_DEVICE_TABLE(acpi, static_replicator_acpi_ids);
+#endif
+
+static struct platform_driver static_replicator_driver = {
+	.probe          = static_replicator_probe,
+	.remove         = static_replicator_remove,
+	.driver         = {
+		.name   = "coresight-static-replicator",
+		/* THIS_MODULE is taken care of by platform_driver_register() */
+		.of_match_table = of_match_ptr(static_replicator_match),
+		.acpi_match_table = ACPI_PTR(static_replicator_acpi_ids),
+		.pm	= &replicator_dev_pm_ops,
+		.suppress_bind_attrs = true,
+	},
+};
+
+static int dynamic_replicator_probe(struct amba_device *adev,
+				    const struct amba_id *id)
+{
+	return replicator_probe(&adev->dev, &adev->res);
+}
+
+static void dynamic_replicator_remove(struct amba_device *adev)
+{
+	replicator_remove(&adev->dev);
+}
+
+static const struct amba_id dynamic_replicator_ids[] = {
+	CS_AMBA_ID(0x000bb909),
+	CS_AMBA_ID(0x000bb9ec),		/* Coresight SoC-600 */
+	{},
+};
+
+MODULE_DEVICE_TABLE(amba, dynamic_replicator_ids);
+
+static struct amba_driver dynamic_replicator_driver = {
+	.drv = {
+		.name	= "coresight-dynamic-replicator",
+		.pm	= &replicator_dev_pm_ops,
+		.owner	= THIS_MODULE,
+		.suppress_bind_attrs = true,
+	},
+	.probe		= dynamic_replicator_probe,
+	.remove         = dynamic_replicator_remove,
+	.id_table	= dynamic_replicator_ids,
+};
+
+static int __init replicator_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&static_replicator_driver);
+	if (ret) {
+		pr_info("Error registering platform driver\n");
+		return ret;
+	}
+
+	ret = amba_driver_register(&dynamic_replicator_driver);
+	if (ret) {
+		pr_info("Error registering amba driver\n");
+		platform_driver_unregister(&static_replicator_driver);
+	}
+
+	return ret;
+}
+
+static void __exit replicator_exit(void)
+{
+	platform_driver_unregister(&static_replicator_driver);
+	amba_driver_unregister(&dynamic_replicator_driver);
+}
+
+module_init(replicator_init);
+module_exit(replicator_exit);
+
+MODULE_AUTHOR("Pratik Patel <pratikp@codeaurora.org>");
+MODULE_AUTHOR("Mathieu Poirier <mathieu.poirier@linaro.org>");
+MODULE_DESCRIPTION("Arm CoreSight Replicator Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/hwtracing/coresight/coresight-self-hosted-trace.h b/drivers/hwtracing/coresight/coresight-self-hosted-trace.h
new file mode 100644
index 0000000000..53840a2c41
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-self-hosted-trace.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Arm v8 Self-Hosted trace support.
+ *
+ * Copyright (C) 2021 ARM Ltd.
+ */
+
+#ifndef __CORESIGHT_SELF_HOSTED_TRACE_H
+#define __CORESIGHT_SELF_HOSTED_TRACE_H
+
+#include <asm/sysreg.h>
+
+static inline u64 read_trfcr(void)
+{
+	return read_sysreg_s(SYS_TRFCR_EL1);
+}
+
+static inline void write_trfcr(u64 val)
+{
+	write_sysreg_s(val, SYS_TRFCR_EL1);
+	isb();
+}
+
+static inline u64 cpu_prohibit_trace(void)
+{
+	u64 trfcr = read_trfcr();
+
+	/* Prohibit tracing at EL0 & the kernel EL */
+	write_trfcr(trfcr & ~(TRFCR_ELx_ExTRE | TRFCR_ELx_E0TRE));
+	/* Return the original value of the TRFCR */
+	return trfcr;
+}
+#endif /*  __CORESIGHT_SELF_HOSTED_TRACE_H */
diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c
new file mode 100644
index 0000000000..a1c27c901a
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-stm.c
@@ -0,0 +1,974 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * Description: CoreSight System Trace Macrocell driver
+ *
+ * Initial implementation by Pratik Patel
+ * (C) 2014-2015 Pratik Patel <pratikp@codeaurora.org>
+ *
+ * Serious refactoring, code cleanup and upgrading to the Coresight upstream
+ * framework by Mathieu Poirier
+ * (C) 2015-2016 Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * Guaranteed timing and support for various packet type coming from the
+ * generic STM API by Chunyan Zhang
+ * (C) 2015-2016 Chunyan Zhang <zhang.chunyan@linaro.org>
+ */
+#include <asm/local.h>
+#include <linux/acpi.h>
+#include <linux/amba/bus.h>
+#include <linux/bitmap.h>
+#include <linux/clk.h>
+#include <linux/coresight.h>
+#include <linux/coresight-stm.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+#include <linux/of_address.h>
+#include <linux/perf_event.h>
+#include <linux/pm_runtime.h>
+#include <linux/stm.h>
+
+#include "coresight-priv.h"
+#include "coresight-trace-id.h"
+
+#define STMDMASTARTR			0xc04
+#define STMDMASTOPR			0xc08
+#define STMDMASTATR			0xc0c
+#define STMDMACTLR			0xc10
+#define STMDMAIDR			0xcfc
+#define STMHEER				0xd00
+#define STMHETER			0xd20
+#define STMHEBSR			0xd60
+#define STMHEMCR			0xd64
+#define STMHEMASTR			0xdf4
+#define STMHEFEAT1R			0xdf8
+#define STMHEIDR			0xdfc
+#define STMSPER				0xe00
+#define STMSPTER			0xe20
+#define STMPRIVMASKR			0xe40
+#define STMSPSCR			0xe60
+#define STMSPMSCR			0xe64
+#define STMSPOVERRIDER			0xe68
+#define STMSPMOVERRIDER			0xe6c
+#define STMSPTRIGCSR			0xe70
+#define STMTCSR				0xe80
+#define STMTSSTIMR			0xe84
+#define STMTSFREQR			0xe8c
+#define STMSYNCR			0xe90
+#define STMAUXCR			0xe94
+#define STMSPFEAT1R			0xea0
+#define STMSPFEAT2R			0xea4
+#define STMSPFEAT3R			0xea8
+#define STMITTRIGGER			0xee8
+#define STMITATBDATA0			0xeec
+#define STMITATBCTR2			0xef0
+#define STMITATBID			0xef4
+#define STMITATBCTR0			0xef8
+
+#define STM_32_CHANNEL			32
+#define BYTES_PER_CHANNEL		256
+#define STM_TRACE_BUF_SIZE		4096
+#define STM_SW_MASTER_END		127
+
+/* Register bit definition */
+#define STMTCSR_BUSY_BIT		23
+/* Reserve the first 10 channels for kernel usage */
+#define STM_CHANNEL_OFFSET		0
+
+enum stm_pkt_type {
+	STM_PKT_TYPE_DATA	= 0x98,
+	STM_PKT_TYPE_FLAG	= 0xE8,
+	STM_PKT_TYPE_TRIG	= 0xF8,
+};
+
+#define stm_channel_addr(drvdata, ch)	(drvdata->chs.base +	\
+					(ch * BYTES_PER_CHANNEL))
+#define stm_channel_off(type, opts)	(type & ~opts)
+
+static int boot_nr_channel;
+
+/*
+ * Not really modular but using module_param is the easiest way to
+ * remain consistent with existing use cases for now.
+ */
+module_param_named(
+	boot_nr_channel, boot_nr_channel, int, S_IRUGO
+);
+
+/*
+ * struct channel_space - central management entity for extended ports
+ * @base:		memory mapped base address where channels start.
+ * @phys:		physical base address of channel region.
+ * @guaraneed:		is the channel delivery guaranteed.
+ */
+struct channel_space {
+	void __iomem		*base;
+	phys_addr_t		phys;
+	unsigned long		*guaranteed;
+};
+
+DEFINE_CORESIGHT_DEVLIST(stm_devs, "stm");
+
+/**
+ * struct stm_drvdata - specifics associated to an STM component
+ * @base:		memory mapped base address for this component.
+ * @atclk:		optional clock for the core parts of the STM.
+ * @csdev:		component vitals needed by the framework.
+ * @spinlock:		only one at a time pls.
+ * @chs:		the channels accociated to this STM.
+ * @stm:		structure associated to the generic STM interface.
+ * @mode:		this tracer's mode (enum cs_mode), i.e sysFS, or disabled.
+ * @traceid:		value of the current ID for this component.
+ * @write_bytes:	Maximus bytes this STM can write at a time.
+ * @stmsper:		settings for register STMSPER.
+ * @stmspscr:		settings for register STMSPSCR.
+ * @numsp:		the total number of stimulus port support by this STM.
+ * @stmheer:		settings for register STMHEER.
+ * @stmheter:		settings for register STMHETER.
+ * @stmhebsr:		settings for register STMHEBSR.
+ */
+struct stm_drvdata {
+	void __iomem		*base;
+	struct clk		*atclk;
+	struct coresight_device	*csdev;
+	spinlock_t		spinlock;
+	struct channel_space	chs;
+	struct stm_data		stm;
+	local_t			mode;
+	u8			traceid;
+	u32			write_bytes;
+	u32			stmsper;
+	u32			stmspscr;
+	u32			numsp;
+	u32			stmheer;
+	u32			stmheter;
+	u32			stmhebsr;
+};
+
+static void stm_hwevent_enable_hw(struct stm_drvdata *drvdata)
+{
+	CS_UNLOCK(drvdata->base);
+
+	writel_relaxed(drvdata->stmhebsr, drvdata->base + STMHEBSR);
+	writel_relaxed(drvdata->stmheter, drvdata->base + STMHETER);
+	writel_relaxed(drvdata->stmheer, drvdata->base + STMHEER);
+	writel_relaxed(0x01 |	/* Enable HW event tracing */
+		       0x04,	/* Error detection on event tracing */
+		       drvdata->base + STMHEMCR);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void stm_port_enable_hw(struct stm_drvdata *drvdata)
+{
+	CS_UNLOCK(drvdata->base);
+	/* ATB trigger enable on direct writes to TRIG locations */
+	writel_relaxed(0x10,
+		       drvdata->base + STMSPTRIGCSR);
+	writel_relaxed(drvdata->stmspscr, drvdata->base + STMSPSCR);
+	writel_relaxed(drvdata->stmsper, drvdata->base + STMSPER);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void stm_enable_hw(struct stm_drvdata *drvdata)
+{
+	if (drvdata->stmheer)
+		stm_hwevent_enable_hw(drvdata);
+
+	stm_port_enable_hw(drvdata);
+
+	CS_UNLOCK(drvdata->base);
+
+	/* 4096 byte between synchronisation packets */
+	writel_relaxed(0xFFF, drvdata->base + STMSYNCR);
+	writel_relaxed((drvdata->traceid << 16 | /* trace id */
+			0x02 |			 /* timestamp enable */
+			0x01),			 /* global STM enable */
+			drvdata->base + STMTCSR);
+
+	CS_LOCK(drvdata->base);
+}
+
+static int stm_enable(struct coresight_device *csdev, struct perf_event *event,
+		      enum cs_mode mode)
+{
+	u32 val;
+	struct stm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	if (mode != CS_MODE_SYSFS)
+		return -EINVAL;
+
+	val = local_cmpxchg(&drvdata->mode, CS_MODE_DISABLED, mode);
+
+	/* Someone is already using the tracer */
+	if (val)
+		return -EBUSY;
+
+	pm_runtime_get_sync(csdev->dev.parent);
+
+	spin_lock(&drvdata->spinlock);
+	stm_enable_hw(drvdata);
+	spin_unlock(&drvdata->spinlock);
+
+	dev_dbg(&csdev->dev, "STM tracing enabled\n");
+	return 0;
+}
+
+static void stm_hwevent_disable_hw(struct stm_drvdata *drvdata)
+{
+	CS_UNLOCK(drvdata->base);
+
+	writel_relaxed(0x0, drvdata->base + STMHEMCR);
+	writel_relaxed(0x0, drvdata->base + STMHEER);
+	writel_relaxed(0x0, drvdata->base + STMHETER);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void stm_port_disable_hw(struct stm_drvdata *drvdata)
+{
+	CS_UNLOCK(drvdata->base);
+
+	writel_relaxed(0x0, drvdata->base + STMSPER);
+	writel_relaxed(0x0, drvdata->base + STMSPTRIGCSR);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void stm_disable_hw(struct stm_drvdata *drvdata)
+{
+	u32 val;
+
+	CS_UNLOCK(drvdata->base);
+
+	val = readl_relaxed(drvdata->base + STMTCSR);
+	val &= ~0x1; /* clear global STM enable [0] */
+	writel_relaxed(val, drvdata->base + STMTCSR);
+
+	CS_LOCK(drvdata->base);
+
+	stm_port_disable_hw(drvdata);
+	if (drvdata->stmheer)
+		stm_hwevent_disable_hw(drvdata);
+}
+
+static void stm_disable(struct coresight_device *csdev,
+			struct perf_event *event)
+{
+	struct stm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct csdev_access *csa = &csdev->access;
+
+	/*
+	 * For as long as the tracer isn't disabled another entity can't
+	 * change its status.  As such we can read the status here without
+	 * fearing it will change under us.
+	 */
+	if (local_read(&drvdata->mode) == CS_MODE_SYSFS) {
+		spin_lock(&drvdata->spinlock);
+		stm_disable_hw(drvdata);
+		spin_unlock(&drvdata->spinlock);
+
+		/* Wait until the engine has completely stopped */
+		coresight_timeout(csa, STMTCSR, STMTCSR_BUSY_BIT, 0);
+
+		pm_runtime_put(csdev->dev.parent);
+
+		local_set(&drvdata->mode, CS_MODE_DISABLED);
+		dev_dbg(&csdev->dev, "STM tracing disabled\n");
+	}
+}
+
+static const struct coresight_ops_source stm_source_ops = {
+	.enable		= stm_enable,
+	.disable	= stm_disable,
+};
+
+static const struct coresight_ops stm_cs_ops = {
+	.source_ops	= &stm_source_ops,
+};
+
+static inline bool stm_addr_unaligned(const void *addr, u8 write_bytes)
+{
+	return ((unsigned long)addr & (write_bytes - 1));
+}
+
+static void stm_send(void __iomem *addr, const void *data,
+		     u32 size, u8 write_bytes)
+{
+	u8 paload[8];
+
+	if (stm_addr_unaligned(data, write_bytes)) {
+		memcpy(paload, data, size);
+		data = paload;
+	}
+
+	/* now we are 64bit/32bit aligned */
+	switch (size) {
+#ifdef CONFIG_64BIT
+	case 8:
+		writeq_relaxed(*(u64 *)data, addr);
+		break;
+#endif
+	case 4:
+		writel_relaxed(*(u32 *)data, addr);
+		break;
+	case 2:
+		writew_relaxed(*(u16 *)data, addr);
+		break;
+	case 1:
+		writeb_relaxed(*(u8 *)data, addr);
+		break;
+	default:
+		break;
+	}
+}
+
+static int stm_generic_link(struct stm_data *stm_data,
+			    unsigned int master,  unsigned int channel)
+{
+	struct stm_drvdata *drvdata = container_of(stm_data,
+						   struct stm_drvdata, stm);
+	if (!drvdata || !drvdata->csdev)
+		return -EINVAL;
+
+	return coresight_enable(drvdata->csdev);
+}
+
+static void stm_generic_unlink(struct stm_data *stm_data,
+			       unsigned int master,  unsigned int channel)
+{
+	struct stm_drvdata *drvdata = container_of(stm_data,
+						   struct stm_drvdata, stm);
+	if (!drvdata || !drvdata->csdev)
+		return;
+
+	coresight_disable(drvdata->csdev);
+}
+
+static phys_addr_t
+stm_mmio_addr(struct stm_data *stm_data, unsigned int master,
+	      unsigned int channel, unsigned int nr_chans)
+{
+	struct stm_drvdata *drvdata = container_of(stm_data,
+						   struct stm_drvdata, stm);
+	phys_addr_t addr;
+
+	addr = drvdata->chs.phys + channel * BYTES_PER_CHANNEL;
+
+	if (offset_in_page(addr) ||
+	    offset_in_page(nr_chans * BYTES_PER_CHANNEL))
+		return 0;
+
+	return addr;
+}
+
+static long stm_generic_set_options(struct stm_data *stm_data,
+				    unsigned int master,
+				    unsigned int channel,
+				    unsigned int nr_chans,
+				    unsigned long options)
+{
+	struct stm_drvdata *drvdata = container_of(stm_data,
+						   struct stm_drvdata, stm);
+	if (!(drvdata && local_read(&drvdata->mode)))
+		return -EINVAL;
+
+	if (channel >= drvdata->numsp)
+		return -EINVAL;
+
+	switch (options) {
+	case STM_OPTION_GUARANTEED:
+		set_bit(channel, drvdata->chs.guaranteed);
+		break;
+
+	case STM_OPTION_INVARIANT:
+		clear_bit(channel, drvdata->chs.guaranteed);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static ssize_t notrace stm_generic_packet(struct stm_data *stm_data,
+				  unsigned int master,
+				  unsigned int channel,
+				  unsigned int packet,
+				  unsigned int flags,
+				  unsigned int size,
+				  const unsigned char *payload)
+{
+	void __iomem *ch_addr;
+	struct stm_drvdata *drvdata = container_of(stm_data,
+						   struct stm_drvdata, stm);
+	unsigned int stm_flags;
+
+	if (!(drvdata && local_read(&drvdata->mode)))
+		return -EACCES;
+
+	if (channel >= drvdata->numsp)
+		return -EINVAL;
+
+	ch_addr = stm_channel_addr(drvdata, channel);
+
+	stm_flags = (flags & STP_PACKET_TIMESTAMPED) ?
+			STM_FLAG_TIMESTAMPED : 0;
+	stm_flags |= test_bit(channel, drvdata->chs.guaranteed) ?
+			   STM_FLAG_GUARANTEED : 0;
+
+	if (size > drvdata->write_bytes)
+		size = drvdata->write_bytes;
+	else
+		size = rounddown_pow_of_two(size);
+
+	switch (packet) {
+	case STP_PACKET_FLAG:
+		ch_addr += stm_channel_off(STM_PKT_TYPE_FLAG, stm_flags);
+
+		/*
+		 * The generic STM core sets a size of '0' on flag packets.
+		 * As such send a flag packet of size '1' and tell the
+		 * core we did so.
+		 */
+		stm_send(ch_addr, payload, 1, drvdata->write_bytes);
+		size = 1;
+		break;
+
+	case STP_PACKET_DATA:
+		stm_flags |= (flags & STP_PACKET_MARKED) ? STM_FLAG_MARKED : 0;
+		ch_addr += stm_channel_off(STM_PKT_TYPE_DATA, stm_flags);
+		stm_send(ch_addr, payload, size,
+				drvdata->write_bytes);
+		break;
+
+	default:
+		return -ENOTSUPP;
+	}
+
+	return size;
+}
+
+static ssize_t hwevent_enable_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct stm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	unsigned long val = drvdata->stmheer;
+
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t hwevent_enable_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t size)
+{
+	struct stm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	unsigned long val;
+	int ret = 0;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return -EINVAL;
+
+	drvdata->stmheer = val;
+	/* HW event enable and trigger go hand in hand */
+	drvdata->stmheter = val;
+
+	return size;
+}
+static DEVICE_ATTR_RW(hwevent_enable);
+
+static ssize_t hwevent_select_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct stm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	unsigned long val = drvdata->stmhebsr;
+
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t hwevent_select_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t size)
+{
+	struct stm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	unsigned long val;
+	int ret = 0;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return -EINVAL;
+
+	drvdata->stmhebsr = val;
+
+	return size;
+}
+static DEVICE_ATTR_RW(hwevent_select);
+
+static ssize_t port_select_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct stm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	unsigned long val;
+
+	if (!local_read(&drvdata->mode)) {
+		val = drvdata->stmspscr;
+	} else {
+		spin_lock(&drvdata->spinlock);
+		val = readl_relaxed(drvdata->base + STMSPSCR);
+		spin_unlock(&drvdata->spinlock);
+	}
+
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t port_select_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t size)
+{
+	struct stm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	unsigned long val, stmsper;
+	int ret = 0;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	drvdata->stmspscr = val;
+
+	if (local_read(&drvdata->mode)) {
+		CS_UNLOCK(drvdata->base);
+		/* Process as per ARM's TRM recommendation */
+		stmsper = readl_relaxed(drvdata->base + STMSPER);
+		writel_relaxed(0x0, drvdata->base + STMSPER);
+		writel_relaxed(drvdata->stmspscr, drvdata->base + STMSPSCR);
+		writel_relaxed(stmsper, drvdata->base + STMSPER);
+		CS_LOCK(drvdata->base);
+	}
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(port_select);
+
+static ssize_t port_enable_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct stm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	unsigned long val;
+
+	if (!local_read(&drvdata->mode)) {
+		val = drvdata->stmsper;
+	} else {
+		spin_lock(&drvdata->spinlock);
+		val = readl_relaxed(drvdata->base + STMSPER);
+		spin_unlock(&drvdata->spinlock);
+	}
+
+	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
+}
+
+static ssize_t port_enable_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t size)
+{
+	struct stm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	unsigned long val;
+	int ret = 0;
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	drvdata->stmsper = val;
+
+	if (local_read(&drvdata->mode)) {
+		CS_UNLOCK(drvdata->base);
+		writel_relaxed(drvdata->stmsper, drvdata->base + STMSPER);
+		CS_LOCK(drvdata->base);
+	}
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(port_enable);
+
+static ssize_t traceid_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct stm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->traceid;
+	return sprintf(buf, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(traceid);
+
+static struct attribute *coresight_stm_attrs[] = {
+	&dev_attr_hwevent_enable.attr,
+	&dev_attr_hwevent_select.attr,
+	&dev_attr_port_enable.attr,
+	&dev_attr_port_select.attr,
+	&dev_attr_traceid.attr,
+	NULL,
+};
+
+static struct attribute *coresight_stm_mgmt_attrs[] = {
+	coresight_simple_reg32(tcsr, STMTCSR),
+	coresight_simple_reg32(tsfreqr, STMTSFREQR),
+	coresight_simple_reg32(syncr, STMSYNCR),
+	coresight_simple_reg32(sper, STMSPER),
+	coresight_simple_reg32(spter, STMSPTER),
+	coresight_simple_reg32(privmaskr, STMPRIVMASKR),
+	coresight_simple_reg32(spscr, STMSPSCR),
+	coresight_simple_reg32(spmscr, STMSPMSCR),
+	coresight_simple_reg32(spfeat1r, STMSPFEAT1R),
+	coresight_simple_reg32(spfeat2r, STMSPFEAT2R),
+	coresight_simple_reg32(spfeat3r, STMSPFEAT3R),
+	coresight_simple_reg32(devid, CORESIGHT_DEVID),
+	NULL,
+};
+
+static const struct attribute_group coresight_stm_group = {
+	.attrs = coresight_stm_attrs,
+};
+
+static const struct attribute_group coresight_stm_mgmt_group = {
+	.attrs = coresight_stm_mgmt_attrs,
+	.name = "mgmt",
+};
+
+static const struct attribute_group *coresight_stm_groups[] = {
+	&coresight_stm_group,
+	&coresight_stm_mgmt_group,
+	NULL,
+};
+
+#ifdef CONFIG_OF
+static int of_stm_get_stimulus_area(struct device *dev, struct resource *res)
+{
+	const char *name = NULL;
+	int index = 0, found = 0;
+	struct device_node *np = dev->of_node;
+
+	while (!of_property_read_string_index(np, "reg-names", index, &name)) {
+		if (strcmp("stm-stimulus-base", name)) {
+			index++;
+			continue;
+		}
+
+		/* We have a match and @index is where it's at */
+		found = 1;
+		break;
+	}
+
+	if (!found)
+		return -EINVAL;
+
+	return of_address_to_resource(np, index, res);
+}
+#else
+static inline int of_stm_get_stimulus_area(struct device *dev,
+					   struct resource *res)
+{
+	return -ENOENT;
+}
+#endif
+
+#ifdef CONFIG_ACPI
+static int acpi_stm_get_stimulus_area(struct device *dev, struct resource *res)
+{
+	int rc;
+	bool found_base = false;
+	struct resource_entry *rent;
+	LIST_HEAD(res_list);
+
+	struct acpi_device *adev = ACPI_COMPANION(dev);
+
+	rc = acpi_dev_get_resources(adev, &res_list, NULL, NULL);
+	if (rc < 0)
+		return rc;
+
+	/*
+	 * The stimulus base for STM device must be listed as the second memory
+	 * resource, followed by the programming base address as described in
+	 * "Section 2.3 Resources" in ACPI for CoreSightTM 1.0 Platform Design
+	 * document (DEN0067).
+	 */
+	rc = -ENOENT;
+	list_for_each_entry(rent, &res_list, node) {
+		if (resource_type(rent->res) != IORESOURCE_MEM)
+			continue;
+		if (found_base) {
+			*res = *rent->res;
+			rc = 0;
+			break;
+		}
+
+		found_base = true;
+	}
+
+	acpi_dev_free_resource_list(&res_list);
+	return rc;
+}
+#else
+static inline int acpi_stm_get_stimulus_area(struct device *dev,
+					     struct resource *res)
+{
+	return -ENOENT;
+}
+#endif
+
+static int stm_get_stimulus_area(struct device *dev, struct resource *res)
+{
+	struct fwnode_handle *fwnode = dev_fwnode(dev);
+
+	if (is_of_node(fwnode))
+		return of_stm_get_stimulus_area(dev, res);
+	else if (is_acpi_node(fwnode))
+		return acpi_stm_get_stimulus_area(dev, res);
+	return -ENOENT;
+}
+
+static u32 stm_fundamental_data_size(struct stm_drvdata *drvdata)
+{
+	u32 stmspfeat2r;
+
+	if (!IS_ENABLED(CONFIG_64BIT))
+		return 4;
+
+	stmspfeat2r = readl_relaxed(drvdata->base + STMSPFEAT2R);
+
+	/*
+	 * bit[15:12] represents the fundamental data size
+	 * 0 - 32-bit data
+	 * 1 - 64-bit data
+	 */
+	return BMVAL(stmspfeat2r, 12, 15) ? 8 : 4;
+}
+
+static u32 stm_num_stimulus_port(struct stm_drvdata *drvdata)
+{
+	u32 numsp;
+
+	numsp = readl_relaxed(drvdata->base + CORESIGHT_DEVID);
+	/*
+	 * NUMPS in STMDEVID is 17 bit long and if equal to 0x0,
+	 * 32 stimulus ports are supported.
+	 */
+	numsp &= 0x1ffff;
+	if (!numsp)
+		numsp = STM_32_CHANNEL;
+	return numsp;
+}
+
+static void stm_init_default_data(struct stm_drvdata *drvdata)
+{
+	/* Don't use port selection */
+	drvdata->stmspscr = 0x0;
+	/*
+	 * Enable all channel regardless of their number.  When port
+	 * selection isn't used (see above) STMSPER applies to all
+	 * 32 channel group available, hence setting all 32 bits to 1
+	 */
+	drvdata->stmsper = ~0x0;
+
+	/* Set invariant transaction timing on all channels */
+	bitmap_clear(drvdata->chs.guaranteed, 0, drvdata->numsp);
+}
+
+static void stm_init_generic_data(struct stm_drvdata *drvdata,
+				  const char *name)
+{
+	drvdata->stm.name = name;
+
+	/*
+	 * MasterIDs are assigned at HW design phase. As such the core is
+	 * using a single master for interaction with this device.
+	 */
+	drvdata->stm.sw_start = 1;
+	drvdata->stm.sw_end = 1;
+	drvdata->stm.hw_override = true;
+	drvdata->stm.sw_nchannels = drvdata->numsp;
+	drvdata->stm.sw_mmiosz = BYTES_PER_CHANNEL;
+	drvdata->stm.packet = stm_generic_packet;
+	drvdata->stm.mmio_addr = stm_mmio_addr;
+	drvdata->stm.link = stm_generic_link;
+	drvdata->stm.unlink = stm_generic_unlink;
+	drvdata->stm.set_options = stm_generic_set_options;
+}
+
+static int stm_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	int ret, trace_id;
+	void __iomem *base;
+	struct device *dev = &adev->dev;
+	struct coresight_platform_data *pdata = NULL;
+	struct stm_drvdata *drvdata;
+	struct resource *res = &adev->res;
+	struct resource ch_res;
+	struct coresight_desc desc = { 0 };
+
+	desc.name = coresight_alloc_device_name(&stm_devs, dev);
+	if (!desc.name)
+		return -ENOMEM;
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	drvdata->atclk = devm_clk_get(&adev->dev, "atclk"); /* optional */
+	if (!IS_ERR(drvdata->atclk)) {
+		ret = clk_prepare_enable(drvdata->atclk);
+		if (ret)
+			return ret;
+	}
+	dev_set_drvdata(dev, drvdata);
+
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+	drvdata->base = base;
+	desc.access = CSDEV_ACCESS_IOMEM(base);
+
+	ret = stm_get_stimulus_area(dev, &ch_res);
+	if (ret)
+		return ret;
+	drvdata->chs.phys = ch_res.start;
+
+	base = devm_ioremap_resource(dev, &ch_res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+	drvdata->chs.base = base;
+
+	drvdata->write_bytes = stm_fundamental_data_size(drvdata);
+
+	if (boot_nr_channel)
+		drvdata->numsp = boot_nr_channel;
+	else
+		drvdata->numsp = stm_num_stimulus_port(drvdata);
+
+	drvdata->chs.guaranteed = devm_bitmap_zalloc(dev, drvdata->numsp,
+						     GFP_KERNEL);
+	if (!drvdata->chs.guaranteed)
+		return -ENOMEM;
+
+	spin_lock_init(&drvdata->spinlock);
+
+	stm_init_default_data(drvdata);
+	stm_init_generic_data(drvdata, desc.name);
+
+	if (stm_register_device(dev, &drvdata->stm, THIS_MODULE)) {
+		dev_info(dev,
+			 "%s : stm_register_device failed, probing deferred\n",
+			 desc.name);
+		return -EPROBE_DEFER;
+	}
+
+	pdata = coresight_get_platform_data(dev);
+	if (IS_ERR(pdata)) {
+		ret = PTR_ERR(pdata);
+		goto stm_unregister;
+	}
+	adev->dev.platform_data = pdata;
+
+	desc.type = CORESIGHT_DEV_TYPE_SOURCE;
+	desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE;
+	desc.ops = &stm_cs_ops;
+	desc.pdata = pdata;
+	desc.dev = dev;
+	desc.groups = coresight_stm_groups;
+	drvdata->csdev = coresight_register(&desc);
+	if (IS_ERR(drvdata->csdev)) {
+		ret = PTR_ERR(drvdata->csdev);
+		goto stm_unregister;
+	}
+
+	trace_id = coresight_trace_id_get_system_id();
+	if (trace_id < 0) {
+		ret = trace_id;
+		goto cs_unregister;
+	}
+	drvdata->traceid = (u8)trace_id;
+
+	pm_runtime_put(&adev->dev);
+
+	dev_info(&drvdata->csdev->dev, "%s initialized\n",
+		 (char *)coresight_get_uci_data(id));
+	return 0;
+
+cs_unregister:
+	coresight_unregister(drvdata->csdev);
+
+stm_unregister:
+	stm_unregister_device(&drvdata->stm);
+	return ret;
+}
+
+static void stm_remove(struct amba_device *adev)
+{
+	struct stm_drvdata *drvdata = dev_get_drvdata(&adev->dev);
+
+	coresight_trace_id_put_system_id(drvdata->traceid);
+	coresight_unregister(drvdata->csdev);
+
+	stm_unregister_device(&drvdata->stm);
+}
+
+#ifdef CONFIG_PM
+static int stm_runtime_suspend(struct device *dev)
+{
+	struct stm_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata && !IS_ERR(drvdata->atclk))
+		clk_disable_unprepare(drvdata->atclk);
+
+	return 0;
+}
+
+static int stm_runtime_resume(struct device *dev)
+{
+	struct stm_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata && !IS_ERR(drvdata->atclk))
+		clk_prepare_enable(drvdata->atclk);
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops stm_dev_pm_ops = {
+	SET_RUNTIME_PM_OPS(stm_runtime_suspend, stm_runtime_resume, NULL)
+};
+
+static const struct amba_id stm_ids[] = {
+	CS_AMBA_ID_DATA(0x000bb962, "STM32"),
+	CS_AMBA_ID_DATA(0x000bb963, "STM500"),
+	{ 0, 0},
+};
+
+MODULE_DEVICE_TABLE(amba, stm_ids);
+
+static struct amba_driver stm_driver = {
+	.drv = {
+		.name   = "coresight-stm",
+		.owner	= THIS_MODULE,
+		.pm	= &stm_dev_pm_ops,
+		.suppress_bind_attrs = true,
+	},
+	.probe          = stm_probe,
+	.remove         = stm_remove,
+	.id_table	= stm_ids,
+};
+
+module_amba_driver(stm_driver);
+
+MODULE_AUTHOR("Pratik Patel <pratikp@codeaurora.org>");
+MODULE_DESCRIPTION("Arm CoreSight System Trace Macrocell driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/hwtracing/coresight/coresight-syscfg-configfs.c b/drivers/hwtracing/coresight/coresight-syscfg-configfs.c
new file mode 100644
index 0000000000..433ede94dd
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-syscfg-configfs.c
@@ -0,0 +1,483 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2020 Linaro Limited, All rights reserved.
+ * Author: Mike Leach <mike.leach@linaro.org>
+ */
+
+#include <linux/configfs.h>
+
+#include "coresight-config.h"
+#include "coresight-syscfg-configfs.h"
+
+/* create a default ci_type. */
+static inline struct config_item_type *cscfg_create_ci_type(void)
+{
+	struct config_item_type *ci_type;
+
+	ci_type = devm_kzalloc(cscfg_device(), sizeof(*ci_type), GFP_KERNEL);
+	if (ci_type)
+		ci_type->ct_owner = THIS_MODULE;
+
+	return ci_type;
+}
+
+/* configurations sub-group */
+
+/* attributes for the config view group */
+static ssize_t cscfg_cfg_description_show(struct config_item *item, char *page)
+{
+	struct cscfg_fs_config *fs_config = container_of(to_config_group(item),
+							 struct cscfg_fs_config, group);
+
+	return scnprintf(page, PAGE_SIZE, "%s", fs_config->config_desc->description);
+}
+CONFIGFS_ATTR_RO(cscfg_cfg_, description);
+
+static ssize_t cscfg_cfg_feature_refs_show(struct config_item *item, char *page)
+{
+	struct cscfg_fs_config *fs_config = container_of(to_config_group(item),
+							 struct cscfg_fs_config, group);
+	const struct cscfg_config_desc *config_desc = fs_config->config_desc;
+	ssize_t ch_used = 0;
+	int i;
+
+	for (i = 0; i < config_desc->nr_feat_refs; i++)
+		ch_used += scnprintf(page + ch_used, PAGE_SIZE - ch_used,
+				     "%s\n", config_desc->feat_ref_names[i]);
+	return ch_used;
+}
+CONFIGFS_ATTR_RO(cscfg_cfg_, feature_refs);
+
+/* list preset values in order of features and params */
+static ssize_t cscfg_cfg_values_show(struct config_item *item, char *page)
+{
+	const struct cscfg_feature_desc *feat_desc;
+	const struct cscfg_config_desc *config_desc;
+	struct cscfg_fs_preset *fs_preset;
+	int i, j, val_idx, preset_idx;
+	ssize_t used = 0;
+
+	fs_preset = container_of(to_config_group(item), struct cscfg_fs_preset, group);
+	config_desc = fs_preset->config_desc;
+
+	if (!config_desc->nr_presets)
+		return 0;
+
+	preset_idx = fs_preset->preset_num - 1;
+
+	/* start index on the correct array line */
+	val_idx = config_desc->nr_total_params * preset_idx;
+
+	/*
+	 * A set of presets is the sum of all params in used features,
+	 * in order of declaration of features and params in the features
+	 */
+	for (i = 0; i < config_desc->nr_feat_refs; i++) {
+		feat_desc = cscfg_get_named_feat_desc(config_desc->feat_ref_names[i]);
+		for (j = 0; j < feat_desc->nr_params; j++) {
+			used += scnprintf(page + used, PAGE_SIZE - used,
+					  "%s.%s = 0x%llx ",
+					  feat_desc->name,
+					  feat_desc->params_desc[j].name,
+					  config_desc->presets[val_idx++]);
+		}
+	}
+	used += scnprintf(page + used, PAGE_SIZE - used, "\n");
+
+	return used;
+}
+CONFIGFS_ATTR_RO(cscfg_cfg_, values);
+
+static ssize_t cscfg_cfg_enable_show(struct config_item *item, char *page)
+{
+	struct cscfg_fs_config *fs_config = container_of(to_config_group(item),
+							 struct cscfg_fs_config, group);
+
+	return scnprintf(page, PAGE_SIZE, "%d\n", fs_config->active);
+}
+
+static ssize_t cscfg_cfg_enable_store(struct config_item *item,
+					const char *page, size_t count)
+{
+	struct cscfg_fs_config *fs_config = container_of(to_config_group(item),
+							 struct cscfg_fs_config, group);
+	int err;
+	bool val;
+
+	err = kstrtobool(page, &val);
+	if (!err)
+		err = cscfg_config_sysfs_activate(fs_config->config_desc, val);
+	if (!err) {
+		fs_config->active = val;
+		if (val)
+			cscfg_config_sysfs_set_preset(fs_config->preset);
+	}
+	return err ? err : count;
+}
+CONFIGFS_ATTR(cscfg_cfg_, enable);
+
+static ssize_t cscfg_cfg_preset_show(struct config_item *item, char *page)
+{
+	struct cscfg_fs_config *fs_config = container_of(to_config_group(item),
+							 struct cscfg_fs_config, group);
+
+	return scnprintf(page, PAGE_SIZE, "%d\n", fs_config->preset);
+}
+
+static ssize_t cscfg_cfg_preset_store(struct config_item *item,
+					     const char *page, size_t count)
+{
+	struct cscfg_fs_config *fs_config = container_of(to_config_group(item),
+							 struct cscfg_fs_config, group);
+	int preset, err;
+
+	err = kstrtoint(page, 0, &preset);
+	if (!err) {
+		/*
+		 * presets start at 1, and go up to max (15),
+		 * but the config may provide fewer.
+		 */
+		if ((preset < 1) || (preset > fs_config->config_desc->nr_presets))
+			err = -EINVAL;
+	}
+
+	if (!err) {
+		/* set new value */
+		fs_config->preset = preset;
+		/* set on system if active */
+		if (fs_config->active)
+			cscfg_config_sysfs_set_preset(fs_config->preset);
+	}
+	return err ? err : count;
+}
+CONFIGFS_ATTR(cscfg_cfg_, preset);
+
+static struct configfs_attribute *cscfg_config_view_attrs[] = {
+	&cscfg_cfg_attr_description,
+	&cscfg_cfg_attr_feature_refs,
+	&cscfg_cfg_attr_enable,
+	&cscfg_cfg_attr_preset,
+	NULL,
+};
+
+static struct config_item_type cscfg_config_view_type = {
+	.ct_owner = THIS_MODULE,
+	.ct_attrs = cscfg_config_view_attrs,
+};
+
+static struct configfs_attribute *cscfg_config_preset_attrs[] = {
+	&cscfg_cfg_attr_values,
+	NULL,
+};
+
+static struct config_item_type cscfg_config_preset_type = {
+	.ct_owner = THIS_MODULE,
+	.ct_attrs = cscfg_config_preset_attrs,
+};
+
+static int cscfg_add_preset_groups(struct cscfg_fs_config *cfg_view)
+{
+	int preset_num;
+	struct cscfg_fs_preset *cfg_fs_preset;
+	struct cscfg_config_desc *config_desc = cfg_view->config_desc;
+	char name[CONFIGFS_ITEM_NAME_LEN];
+
+	if (!config_desc->nr_presets)
+		return 0;
+
+	for (preset_num = 1; preset_num <= config_desc->nr_presets; preset_num++) {
+		cfg_fs_preset = devm_kzalloc(cscfg_device(),
+					     sizeof(struct cscfg_fs_preset), GFP_KERNEL);
+
+		if (!cfg_fs_preset)
+			return -ENOMEM;
+
+		snprintf(name, CONFIGFS_ITEM_NAME_LEN, "preset%d", preset_num);
+		cfg_fs_preset->preset_num = preset_num;
+		cfg_fs_preset->config_desc = cfg_view->config_desc;
+		config_group_init_type_name(&cfg_fs_preset->group, name,
+					    &cscfg_config_preset_type);
+		configfs_add_default_group(&cfg_fs_preset->group, &cfg_view->group);
+	}
+	return 0;
+}
+
+static struct config_group *cscfg_create_config_group(struct cscfg_config_desc *config_desc)
+{
+	struct cscfg_fs_config *cfg_view;
+	struct device *dev = cscfg_device();
+	int err;
+
+	if (!dev)
+		return ERR_PTR(-EINVAL);
+
+	cfg_view = devm_kzalloc(dev, sizeof(struct cscfg_fs_config), GFP_KERNEL);
+	if (!cfg_view)
+		return ERR_PTR(-ENOMEM);
+
+	cfg_view->config_desc = config_desc;
+	config_group_init_type_name(&cfg_view->group, config_desc->name, &cscfg_config_view_type);
+
+	/* add in a preset<n> dir for each preset */
+	err = cscfg_add_preset_groups(cfg_view);
+	if (err)
+		return ERR_PTR(err);
+
+	return &cfg_view->group;
+}
+
+/* attributes for features view */
+
+static ssize_t cscfg_feat_description_show(struct config_item *item, char *page)
+{
+	struct cscfg_fs_feature *fs_feat = container_of(to_config_group(item),
+							struct cscfg_fs_feature, group);
+
+	return scnprintf(page, PAGE_SIZE, "%s", fs_feat->feat_desc->description);
+}
+CONFIGFS_ATTR_RO(cscfg_feat_, description);
+
+static ssize_t cscfg_feat_matches_show(struct config_item *item, char *page)
+{
+	struct cscfg_fs_feature *fs_feat = container_of(to_config_group(item),
+							struct cscfg_fs_feature, group);
+	u32 match_flags = fs_feat->feat_desc->match_flags;
+	int used = 0;
+
+	if (match_flags & CS_CFG_MATCH_CLASS_SRC_ALL)
+		used = scnprintf(page, PAGE_SIZE, "SRC_ALL ");
+
+	if (match_flags & CS_CFG_MATCH_CLASS_SRC_ETM4)
+		used += scnprintf(page + used, PAGE_SIZE - used, "SRC_ETMV4 ");
+
+	used += scnprintf(page + used, PAGE_SIZE - used, "\n");
+	return used;
+}
+CONFIGFS_ATTR_RO(cscfg_feat_, matches);
+
+static ssize_t cscfg_feat_nr_params_show(struct config_item *item, char *page)
+{
+	struct cscfg_fs_feature *fs_feat = container_of(to_config_group(item),
+							struct cscfg_fs_feature, group);
+
+	return scnprintf(page, PAGE_SIZE, "%d\n", fs_feat->feat_desc->nr_params);
+}
+CONFIGFS_ATTR_RO(cscfg_feat_, nr_params);
+
+/* base feature desc attrib structures */
+static struct configfs_attribute *cscfg_feature_view_attrs[] = {
+	&cscfg_feat_attr_description,
+	&cscfg_feat_attr_matches,
+	&cscfg_feat_attr_nr_params,
+	NULL,
+};
+
+static struct config_item_type cscfg_feature_view_type = {
+	.ct_owner = THIS_MODULE,
+	.ct_attrs = cscfg_feature_view_attrs,
+};
+
+static ssize_t cscfg_param_value_show(struct config_item *item, char *page)
+{
+	struct cscfg_fs_param *param_item = container_of(to_config_group(item),
+							 struct cscfg_fs_param, group);
+	u64 value = param_item->feat_desc->params_desc[param_item->param_idx].value;
+
+	return scnprintf(page, PAGE_SIZE, "0x%llx\n", value);
+}
+
+static ssize_t cscfg_param_value_store(struct config_item *item,
+				       const char *page, size_t size)
+{
+	struct cscfg_fs_param *param_item = container_of(to_config_group(item),
+							 struct cscfg_fs_param, group);
+	struct cscfg_feature_desc *feat_desc = param_item->feat_desc;
+	int param_idx = param_item->param_idx;
+	u64 value;
+	int err;
+
+	err = kstrtoull(page, 0, &value);
+	if (!err)
+		err = cscfg_update_feat_param_val(feat_desc, param_idx, value);
+
+	return err ? err : size;
+}
+CONFIGFS_ATTR(cscfg_param_, value);
+
+static struct configfs_attribute *cscfg_param_view_attrs[] = {
+	&cscfg_param_attr_value,
+	NULL,
+};
+
+static struct config_item_type cscfg_param_view_type = {
+	.ct_owner = THIS_MODULE,
+	.ct_attrs = cscfg_param_view_attrs,
+};
+
+/*
+ * configfs has far less functionality provided to add attributes dynamically than sysfs,
+ * and the show and store fns pass the enclosing config_item so the actual attribute cannot
+ * be determined. Therefore we add each item as a group directory, with a value attribute.
+ */
+static int cscfg_create_params_group_items(struct cscfg_feature_desc *feat_desc,
+					   struct config_group *params_group)
+{
+	struct device *dev = cscfg_device();
+	struct cscfg_fs_param *param_item;
+	int i;
+
+	/* parameter items - as groups with default_value attribute */
+	for (i = 0; i < feat_desc->nr_params; i++) {
+		param_item = devm_kzalloc(dev, sizeof(struct cscfg_fs_param), GFP_KERNEL);
+		if (!param_item)
+			return -ENOMEM;
+		param_item->feat_desc = feat_desc;
+		param_item->param_idx = i;
+		config_group_init_type_name(&param_item->group,
+					    feat_desc->params_desc[i].name,
+					    &cscfg_param_view_type);
+		configfs_add_default_group(&param_item->group, params_group);
+	}
+	return 0;
+}
+
+static struct config_group *cscfg_create_feature_group(struct cscfg_feature_desc *feat_desc)
+{
+	struct cscfg_fs_feature *feat_view;
+	struct config_item_type *params_group_type;
+	struct config_group *params_group = NULL;
+	struct device *dev = cscfg_device();
+	int item_err;
+
+	if (!dev)
+		return ERR_PTR(-EINVAL);
+
+	feat_view = devm_kzalloc(dev, sizeof(struct cscfg_fs_feature), GFP_KERNEL);
+	if (!feat_view)
+		return ERR_PTR(-ENOMEM);
+
+	if (feat_desc->nr_params) {
+		params_group = devm_kzalloc(dev, sizeof(struct config_group), GFP_KERNEL);
+		if (!params_group)
+			return ERR_PTR(-ENOMEM);
+
+		params_group_type = cscfg_create_ci_type();
+		if (!params_group_type)
+			return ERR_PTR(-ENOMEM);
+	}
+
+	feat_view->feat_desc = feat_desc;
+	config_group_init_type_name(&feat_view->group,
+				    feat_desc->name,
+				    &cscfg_feature_view_type);
+	if (params_group) {
+		config_group_init_type_name(params_group, "params", params_group_type);
+		configfs_add_default_group(params_group, &feat_view->group);
+		item_err = cscfg_create_params_group_items(feat_desc, params_group);
+		if (item_err)
+			return ERR_PTR(item_err);
+	}
+	return &feat_view->group;
+}
+
+static struct config_item_type cscfg_configs_type = {
+	.ct_owner = THIS_MODULE,
+};
+
+static struct config_group cscfg_configs_grp = {
+	.cg_item = {
+		.ci_namebuf = "configurations",
+		.ci_type = &cscfg_configs_type,
+	},
+};
+
+/* add configuration to configurations group */
+int cscfg_configfs_add_config(struct cscfg_config_desc *config_desc)
+{
+	struct config_group *new_group;
+	int err;
+
+	new_group = cscfg_create_config_group(config_desc);
+	if (IS_ERR(new_group))
+		return PTR_ERR(new_group);
+	err =  configfs_register_group(&cscfg_configs_grp, new_group);
+	if (!err)
+		config_desc->fs_group = new_group;
+	return err;
+}
+
+void cscfg_configfs_del_config(struct cscfg_config_desc *config_desc)
+{
+	if (config_desc->fs_group) {
+		configfs_unregister_group(config_desc->fs_group);
+		config_desc->fs_group = NULL;
+	}
+}
+
+static struct config_item_type cscfg_features_type = {
+	.ct_owner = THIS_MODULE,
+};
+
+static struct config_group cscfg_features_grp = {
+	.cg_item = {
+		.ci_namebuf = "features",
+		.ci_type = &cscfg_features_type,
+	},
+};
+
+/* add feature to features group */
+int cscfg_configfs_add_feature(struct cscfg_feature_desc *feat_desc)
+{
+	struct config_group *new_group;
+	int err;
+
+	new_group = cscfg_create_feature_group(feat_desc);
+	if (IS_ERR(new_group))
+		return PTR_ERR(new_group);
+	err =  configfs_register_group(&cscfg_features_grp, new_group);
+	if (!err)
+		feat_desc->fs_group = new_group;
+	return err;
+}
+
+void cscfg_configfs_del_feature(struct cscfg_feature_desc *feat_desc)
+{
+	if (feat_desc->fs_group) {
+		configfs_unregister_group(feat_desc->fs_group);
+		feat_desc->fs_group = NULL;
+	}
+}
+
+int cscfg_configfs_init(struct cscfg_manager *cscfg_mgr)
+{
+	struct configfs_subsystem *subsys;
+	struct config_item_type *ci_type;
+
+	if (!cscfg_mgr)
+		return -EINVAL;
+
+	ci_type = cscfg_create_ci_type();
+	if (!ci_type)
+		return -ENOMEM;
+
+	subsys = &cscfg_mgr->cfgfs_subsys;
+	config_item_set_name(&subsys->su_group.cg_item, CSCFG_FS_SUBSYS_NAME);
+	subsys->su_group.cg_item.ci_type = ci_type;
+
+	config_group_init(&subsys->su_group);
+	mutex_init(&subsys->su_mutex);
+
+	/* Add default groups to subsystem */
+	config_group_init(&cscfg_configs_grp);
+	configfs_add_default_group(&cscfg_configs_grp, &subsys->su_group);
+
+	config_group_init(&cscfg_features_grp);
+	configfs_add_default_group(&cscfg_features_grp, &subsys->su_group);
+
+	return configfs_register_subsystem(subsys);
+}
+
+void cscfg_configfs_release(struct cscfg_manager *cscfg_mgr)
+{
+	configfs_unregister_subsystem(&cscfg_mgr->cfgfs_subsys);
+}
diff --git a/drivers/hwtracing/coresight/coresight-syscfg-configfs.h b/drivers/hwtracing/coresight/coresight-syscfg-configfs.h
new file mode 100644
index 0000000000..373d84d432
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-syscfg-configfs.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Coresight system configuration driver - support for configfs.
+ */
+
+#ifndef CORESIGHT_SYSCFG_CONFIGFS_H
+#define CORESIGHT_SYSCFG_CONFIGFS_H
+
+#include <linux/configfs.h>
+#include "coresight-syscfg.h"
+
+#define CSCFG_FS_SUBSYS_NAME "cs-syscfg"
+
+/* container for configuration view */
+struct cscfg_fs_config {
+	struct cscfg_config_desc *config_desc;
+	struct config_group group;
+	bool active;
+	int preset;
+};
+
+/* container for feature view */
+struct cscfg_fs_feature {
+	struct cscfg_feature_desc *feat_desc;
+	struct config_group group;
+};
+
+/* container for parameter view */
+struct cscfg_fs_param {
+	int param_idx;
+	struct cscfg_feature_desc *feat_desc;
+	struct config_group group;
+};
+
+/* container for preset view */
+struct cscfg_fs_preset {
+	int preset_num;
+	struct cscfg_config_desc *config_desc;
+	struct config_group group;
+};
+
+int cscfg_configfs_init(struct cscfg_manager *cscfg_mgr);
+void cscfg_configfs_release(struct cscfg_manager *cscfg_mgr);
+int cscfg_configfs_add_config(struct cscfg_config_desc *config_desc);
+int cscfg_configfs_add_feature(struct cscfg_feature_desc *feat_desc);
+void cscfg_configfs_del_config(struct cscfg_config_desc *config_desc);
+void cscfg_configfs_del_feature(struct cscfg_feature_desc *feat_desc);
+
+#endif /* CORESIGHT_SYSCFG_CONFIGFS_H */
diff --git a/drivers/hwtracing/coresight/coresight-syscfg.c b/drivers/hwtracing/coresight/coresight-syscfg.c
new file mode 100644
index 0000000000..11138a9762
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-syscfg.c
@@ -0,0 +1,1298 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2020 Linaro Limited, All rights reserved.
+ * Author: Mike Leach <mike.leach@linaro.org>
+ */
+
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "coresight-config.h"
+#include "coresight-etm-perf.h"
+#include "coresight-syscfg.h"
+#include "coresight-syscfg-configfs.h"
+
+/*
+ * cscfg_ API manages configurations and features for the entire coresight
+ * infrastructure.
+ *
+ * It allows the loading of configurations and features, and loads these into
+ * coresight devices as appropriate.
+ */
+
+/* protect the cscsg_data and device */
+static DEFINE_MUTEX(cscfg_mutex);
+
+/* only one of these */
+static struct cscfg_manager *cscfg_mgr;
+
+/* load features and configuations into the lists */
+
+/* get name feature instance from a coresight device list of features */
+static struct cscfg_feature_csdev *
+cscfg_get_feat_csdev(struct coresight_device *csdev, const char *name)
+{
+	struct cscfg_feature_csdev *feat_csdev = NULL;
+
+	list_for_each_entry(feat_csdev, &csdev->feature_csdev_list, node) {
+		if (strcmp(feat_csdev->feat_desc->name, name) == 0)
+			return feat_csdev;
+	}
+	return NULL;
+}
+
+/* allocate the device config instance - with max number of used features */
+static struct cscfg_config_csdev *
+cscfg_alloc_csdev_cfg(struct coresight_device *csdev, int nr_feats)
+{
+	struct cscfg_config_csdev *config_csdev = NULL;
+	struct device *dev = csdev->dev.parent;
+
+	/* this is being allocated using the devm for the coresight device */
+	config_csdev = devm_kzalloc(dev,
+				    offsetof(struct cscfg_config_csdev, feats_csdev[nr_feats]),
+				    GFP_KERNEL);
+	if (!config_csdev)
+		return NULL;
+
+	config_csdev->csdev = csdev;
+	return config_csdev;
+}
+
+/* Load a config into a device if there are any feature matches between config and device */
+static int cscfg_add_csdev_cfg(struct coresight_device *csdev,
+			       struct cscfg_config_desc *config_desc)
+{
+	struct cscfg_config_csdev *config_csdev = NULL;
+	struct cscfg_feature_csdev *feat_csdev;
+	unsigned long flags;
+	int i;
+
+	/* look at each required feature and see if it matches any feature on the device */
+	for (i = 0; i < config_desc->nr_feat_refs; i++) {
+		/* look for a matching name */
+		feat_csdev = cscfg_get_feat_csdev(csdev, config_desc->feat_ref_names[i]);
+		if (feat_csdev) {
+			/*
+			 * At least one feature on this device matches the config
+			 * add a config instance to the device and a reference to the feature.
+			 */
+			if (!config_csdev) {
+				config_csdev = cscfg_alloc_csdev_cfg(csdev,
+								     config_desc->nr_feat_refs);
+				if (!config_csdev)
+					return -ENOMEM;
+				config_csdev->config_desc = config_desc;
+			}
+			config_csdev->feats_csdev[config_csdev->nr_feat++] = feat_csdev;
+		}
+	}
+	/* if matched features, add config to device.*/
+	if (config_csdev) {
+		spin_lock_irqsave(&csdev->cscfg_csdev_lock, flags);
+		list_add(&config_csdev->node, &csdev->config_csdev_list);
+		spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags);
+	}
+
+	return 0;
+}
+
+/*
+ * Add the config to the set of registered devices - call with mutex locked.
+ * Iterates through devices - any device that matches one or more of the
+ * configuration features will load it, the others will ignore it.
+ */
+static int cscfg_add_cfg_to_csdevs(struct cscfg_config_desc *config_desc)
+{
+	struct cscfg_registered_csdev *csdev_item;
+	int err;
+
+	list_for_each_entry(csdev_item, &cscfg_mgr->csdev_desc_list, item) {
+		err = cscfg_add_csdev_cfg(csdev_item->csdev, config_desc);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+/*
+ * Allocate a feature object for load into a csdev.
+ * memory allocated using the csdev->dev object using devm managed allocator.
+ */
+static struct cscfg_feature_csdev *
+cscfg_alloc_csdev_feat(struct coresight_device *csdev, struct cscfg_feature_desc *feat_desc)
+{
+	struct cscfg_feature_csdev *feat_csdev = NULL;
+	struct device *dev = csdev->dev.parent;
+	int i;
+
+	feat_csdev = devm_kzalloc(dev, sizeof(struct cscfg_feature_csdev), GFP_KERNEL);
+	if (!feat_csdev)
+		return NULL;
+
+	/* parameters are optional - could be 0 */
+	feat_csdev->nr_params = feat_desc->nr_params;
+
+	/*
+	 * if we need parameters, zero alloc the space here, the load routine in
+	 * the csdev device driver will fill out some information according to
+	 * feature descriptor.
+	 */
+	if (feat_csdev->nr_params) {
+		feat_csdev->params_csdev = devm_kcalloc(dev, feat_csdev->nr_params,
+							sizeof(struct cscfg_parameter_csdev),
+							GFP_KERNEL);
+		if (!feat_csdev->params_csdev)
+			return NULL;
+
+		/*
+		 * fill in the feature reference in the param - other fields
+		 * handled by loader in csdev.
+		 */
+		for (i = 0; i < feat_csdev->nr_params; i++)
+			feat_csdev->params_csdev[i].feat_csdev = feat_csdev;
+	}
+
+	/*
+	 * Always have registers to program - again the load routine in csdev device
+	 * will fill out according to feature descriptor and device requirements.
+	 */
+	feat_csdev->nr_regs = feat_desc->nr_regs;
+	feat_csdev->regs_csdev = devm_kcalloc(dev, feat_csdev->nr_regs,
+					      sizeof(struct cscfg_regval_csdev),
+					      GFP_KERNEL);
+	if (!feat_csdev->regs_csdev)
+		return NULL;
+
+	/* load the feature default values */
+	feat_csdev->feat_desc = feat_desc;
+	feat_csdev->csdev = csdev;
+
+	return feat_csdev;
+}
+
+/* load one feature into one coresight device */
+static int cscfg_load_feat_csdev(struct coresight_device *csdev,
+				 struct cscfg_feature_desc *feat_desc,
+				 struct cscfg_csdev_feat_ops *ops)
+{
+	struct cscfg_feature_csdev *feat_csdev;
+	unsigned long flags;
+	int err;
+
+	if (!ops->load_feat)
+		return -EINVAL;
+
+	feat_csdev = cscfg_alloc_csdev_feat(csdev, feat_desc);
+	if (!feat_csdev)
+		return -ENOMEM;
+
+	/* load the feature into the device */
+	err = ops->load_feat(csdev, feat_csdev);
+	if (err)
+		return err;
+
+	/* add to internal csdev feature list & initialise using reset call */
+	cscfg_reset_feat(feat_csdev);
+	spin_lock_irqsave(&csdev->cscfg_csdev_lock, flags);
+	list_add(&feat_csdev->node, &csdev->feature_csdev_list);
+	spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags);
+
+	return 0;
+}
+
+/*
+ * Add feature to any matching devices - call with mutex locked.
+ * Iterates through devices - any device that matches the feature will be
+ * called to load it.
+ */
+static int cscfg_add_feat_to_csdevs(struct cscfg_feature_desc *feat_desc)
+{
+	struct cscfg_registered_csdev *csdev_item;
+	int err;
+
+	list_for_each_entry(csdev_item, &cscfg_mgr->csdev_desc_list, item) {
+		if (csdev_item->match_flags & feat_desc->match_flags) {
+			err = cscfg_load_feat_csdev(csdev_item->csdev, feat_desc, &csdev_item->ops);
+			if (err)
+				return err;
+		}
+	}
+	return 0;
+}
+
+/* check feature list for a named feature - call with mutex locked. */
+static bool cscfg_match_list_feat(const char *name)
+{
+	struct cscfg_feature_desc *feat_desc;
+
+	list_for_each_entry(feat_desc, &cscfg_mgr->feat_desc_list, item) {
+		if (strcmp(feat_desc->name, name) == 0)
+			return true;
+	}
+	return false;
+}
+
+/* check all feat needed for cfg are in the list - call with mutex locked. */
+static int cscfg_check_feat_for_cfg(struct cscfg_config_desc *config_desc)
+{
+	int i;
+
+	for (i = 0; i < config_desc->nr_feat_refs; i++)
+		if (!cscfg_match_list_feat(config_desc->feat_ref_names[i]))
+			return -EINVAL;
+	return 0;
+}
+
+/*
+ * load feature - add to feature list.
+ */
+static int cscfg_load_feat(struct cscfg_feature_desc *feat_desc)
+{
+	int err;
+	struct cscfg_feature_desc *feat_desc_exist;
+
+	/* new feature must have unique name */
+	list_for_each_entry(feat_desc_exist, &cscfg_mgr->feat_desc_list, item) {
+		if (!strcmp(feat_desc_exist->name, feat_desc->name))
+			return -EEXIST;
+	}
+
+	/* add feature to any matching registered devices */
+	err = cscfg_add_feat_to_csdevs(feat_desc);
+	if (err)
+		return err;
+
+	list_add(&feat_desc->item, &cscfg_mgr->feat_desc_list);
+	return 0;
+}
+
+/*
+ * load config into the system - validate used features exist then add to
+ * config list.
+ */
+static int cscfg_load_config(struct cscfg_config_desc *config_desc)
+{
+	int err;
+	struct cscfg_config_desc *config_desc_exist;
+
+	/* new configuration must have a unique name */
+	list_for_each_entry(config_desc_exist, &cscfg_mgr->config_desc_list, item) {
+		if (!strcmp(config_desc_exist->name, config_desc->name))
+			return -EEXIST;
+	}
+
+	/* validate features are present */
+	err = cscfg_check_feat_for_cfg(config_desc);
+	if (err)
+		return err;
+
+	/* add config to any matching registered device */
+	err = cscfg_add_cfg_to_csdevs(config_desc);
+	if (err)
+		return err;
+
+	/* add config to perf fs to allow selection */
+	err = etm_perf_add_symlink_cscfg(cscfg_device(), config_desc);
+	if (err)
+		return err;
+
+	list_add(&config_desc->item, &cscfg_mgr->config_desc_list);
+	atomic_set(&config_desc->active_cnt, 0);
+	return 0;
+}
+
+/* get a feature descriptor by name */
+const struct cscfg_feature_desc *cscfg_get_named_feat_desc(const char *name)
+{
+	const struct cscfg_feature_desc *feat_desc = NULL, *feat_desc_item;
+
+	mutex_lock(&cscfg_mutex);
+
+	list_for_each_entry(feat_desc_item, &cscfg_mgr->feat_desc_list, item) {
+		if (strcmp(feat_desc_item->name, name) == 0) {
+			feat_desc = feat_desc_item;
+			break;
+		}
+	}
+
+	mutex_unlock(&cscfg_mutex);
+	return feat_desc;
+}
+
+/* called with cscfg_mutex held */
+static struct cscfg_feature_csdev *
+cscfg_csdev_get_feat_from_desc(struct coresight_device *csdev,
+			       struct cscfg_feature_desc *feat_desc)
+{
+	struct cscfg_feature_csdev *feat_csdev;
+
+	list_for_each_entry(feat_csdev, &csdev->feature_csdev_list, node) {
+		if (feat_csdev->feat_desc == feat_desc)
+			return feat_csdev;
+	}
+	return NULL;
+}
+
+int cscfg_update_feat_param_val(struct cscfg_feature_desc *feat_desc,
+				int param_idx, u64 value)
+{
+	int err = 0;
+	struct cscfg_feature_csdev *feat_csdev;
+	struct cscfg_registered_csdev *csdev_item;
+
+	mutex_lock(&cscfg_mutex);
+
+	/* check if any config active & return busy */
+	if (atomic_read(&cscfg_mgr->sys_active_cnt)) {
+		err = -EBUSY;
+		goto unlock_exit;
+	}
+
+	/* set the value */
+	if ((param_idx < 0) || (param_idx >= feat_desc->nr_params)) {
+		err = -EINVAL;
+		goto unlock_exit;
+	}
+	feat_desc->params_desc[param_idx].value = value;
+
+	/* update loaded instances.*/
+	list_for_each_entry(csdev_item, &cscfg_mgr->csdev_desc_list, item) {
+		feat_csdev = cscfg_csdev_get_feat_from_desc(csdev_item->csdev, feat_desc);
+		if (feat_csdev)
+			feat_csdev->params_csdev[param_idx].current_value = value;
+	}
+
+unlock_exit:
+	mutex_unlock(&cscfg_mutex);
+	return err;
+}
+
+/*
+ * Conditionally up reference count on owner to prevent unload.
+ *
+ * module loaded configs need to be locked in to prevent premature unload.
+ */
+static int cscfg_owner_get(struct cscfg_load_owner_info *owner_info)
+{
+	if ((owner_info->type == CSCFG_OWNER_MODULE) &&
+	    (!try_module_get(owner_info->owner_handle)))
+		return -EINVAL;
+	return 0;
+}
+
+/* conditionally lower ref count on an owner */
+static void cscfg_owner_put(struct cscfg_load_owner_info *owner_info)
+{
+	if (owner_info->type == CSCFG_OWNER_MODULE)
+		module_put(owner_info->owner_handle);
+}
+
+static void cscfg_remove_owned_csdev_configs(struct coresight_device *csdev, void *load_owner)
+{
+	struct cscfg_config_csdev *config_csdev, *tmp;
+
+	if (list_empty(&csdev->config_csdev_list))
+		return;
+
+	list_for_each_entry_safe(config_csdev, tmp, &csdev->config_csdev_list, node) {
+		if (config_csdev->config_desc->load_owner == load_owner)
+			list_del(&config_csdev->node);
+	}
+}
+
+static void cscfg_remove_owned_csdev_features(struct coresight_device *csdev, void *load_owner)
+{
+	struct cscfg_feature_csdev *feat_csdev, *tmp;
+
+	if (list_empty(&csdev->feature_csdev_list))
+		return;
+
+	list_for_each_entry_safe(feat_csdev, tmp, &csdev->feature_csdev_list, node) {
+		if (feat_csdev->feat_desc->load_owner == load_owner)
+			list_del(&feat_csdev->node);
+	}
+}
+
+/*
+ * Unregister all configuration and features from configfs owned by load_owner.
+ * Although this is called without the list mutex being held, it is in the
+ * context of an unload operation which are strictly serialised,
+ * so the lists cannot change during this call.
+ */
+static void cscfg_fs_unregister_cfgs_feats(void *load_owner)
+{
+	struct cscfg_config_desc *config_desc;
+	struct cscfg_feature_desc *feat_desc;
+
+	list_for_each_entry(config_desc, &cscfg_mgr->config_desc_list, item) {
+		if (config_desc->load_owner == load_owner)
+			cscfg_configfs_del_config(config_desc);
+	}
+	list_for_each_entry(feat_desc, &cscfg_mgr->feat_desc_list, item) {
+		if (feat_desc->load_owner == load_owner)
+			cscfg_configfs_del_feature(feat_desc);
+	}
+}
+
+/*
+ * removal is relatively easy - just remove from all lists, anything that
+ * matches the owner. Memory for the descriptors will be managed by the owner,
+ * memory for the csdev items is devm_ allocated with the individual csdev
+ * devices.
+ */
+static void cscfg_unload_owned_cfgs_feats(void *load_owner)
+{
+	struct cscfg_config_desc *config_desc, *cfg_tmp;
+	struct cscfg_feature_desc *feat_desc, *feat_tmp;
+	struct cscfg_registered_csdev *csdev_item;
+
+	lockdep_assert_held(&cscfg_mutex);
+
+	/* remove from each csdev instance feature and config lists */
+	list_for_each_entry(csdev_item, &cscfg_mgr->csdev_desc_list, item) {
+		/*
+		 * for each csdev, check the loaded lists and remove if
+		 * referenced descriptor is owned
+		 */
+		cscfg_remove_owned_csdev_configs(csdev_item->csdev, load_owner);
+		cscfg_remove_owned_csdev_features(csdev_item->csdev, load_owner);
+	}
+
+	/* remove from the config descriptor lists */
+	list_for_each_entry_safe(config_desc, cfg_tmp, &cscfg_mgr->config_desc_list, item) {
+		if (config_desc->load_owner == load_owner) {
+			etm_perf_del_symlink_cscfg(config_desc);
+			list_del(&config_desc->item);
+		}
+	}
+
+	/* remove from the feature descriptor lists */
+	list_for_each_entry_safe(feat_desc, feat_tmp, &cscfg_mgr->feat_desc_list, item) {
+		if (feat_desc->load_owner == load_owner) {
+			list_del(&feat_desc->item);
+		}
+	}
+}
+
+/*
+ * load the features and configs to the lists - called with list mutex held
+ */
+static int cscfg_load_owned_cfgs_feats(struct cscfg_config_desc **config_descs,
+				       struct cscfg_feature_desc **feat_descs,
+				       struct cscfg_load_owner_info *owner_info)
+{
+	int i, err;
+
+	lockdep_assert_held(&cscfg_mutex);
+
+	/* load features first */
+	if (feat_descs) {
+		for (i = 0; feat_descs[i]; i++) {
+			err = cscfg_load_feat(feat_descs[i]);
+			if (err) {
+				pr_err("coresight-syscfg: Failed to load feature %s\n",
+				       feat_descs[i]->name);
+				return err;
+			}
+			feat_descs[i]->load_owner = owner_info;
+		}
+	}
+
+	/* next any configurations to check feature dependencies */
+	if (config_descs) {
+		for (i = 0; config_descs[i]; i++) {
+			err = cscfg_load_config(config_descs[i]);
+			if (err) {
+				pr_err("coresight-syscfg: Failed to load configuration %s\n",
+				       config_descs[i]->name);
+				return err;
+			}
+			config_descs[i]->load_owner = owner_info;
+			config_descs[i]->available = false;
+		}
+	}
+	return 0;
+}
+
+/* set configurations as available to activate at the end of the load process */
+static void cscfg_set_configs_available(struct cscfg_config_desc **config_descs)
+{
+	int i;
+
+	lockdep_assert_held(&cscfg_mutex);
+
+	if (config_descs) {
+		for (i = 0; config_descs[i]; i++)
+			config_descs[i]->available = true;
+	}
+}
+
+/*
+ * Create and register each of the configurations and features with configfs.
+ * Called without mutex being held.
+ */
+static int cscfg_fs_register_cfgs_feats(struct cscfg_config_desc **config_descs,
+					struct cscfg_feature_desc **feat_descs)
+{
+	int i, err;
+
+	if (feat_descs) {
+		for (i = 0; feat_descs[i]; i++) {
+			err = cscfg_configfs_add_feature(feat_descs[i]);
+			if (err)
+				return err;
+		}
+	}
+	if (config_descs) {
+		for (i = 0; config_descs[i]; i++) {
+			err = cscfg_configfs_add_config(config_descs[i]);
+			if (err)
+				return err;
+		}
+	}
+	return 0;
+}
+
+/**
+ * cscfg_load_config_sets - API function to load feature and config sets.
+ *
+ * Take a 0 terminated array of feature descriptors and/or configuration
+ * descriptors and load into the system.
+ * Features are loaded first to ensure configuration dependencies can be met.
+ *
+ * To facilitate dynamic loading and unloading, features and configurations
+ * have a "load_owner", to allow later unload by the same owner. An owner may
+ * be a loadable module or configuration dynamically created via configfs.
+ * As later loaded configurations can use earlier loaded features, creating load
+ * dependencies, a load order list is maintained. Unload is strictly in the
+ * reverse order to load.
+ *
+ * @config_descs: 0 terminated array of configuration descriptors.
+ * @feat_descs:   0 terminated array of feature descriptors.
+ * @owner_info:	  Information on the owner of this set.
+ */
+int cscfg_load_config_sets(struct cscfg_config_desc **config_descs,
+			   struct cscfg_feature_desc **feat_descs,
+			   struct cscfg_load_owner_info *owner_info)
+{
+	int err = 0;
+
+	mutex_lock(&cscfg_mutex);
+	if (cscfg_mgr->load_state != CSCFG_NONE) {
+		mutex_unlock(&cscfg_mutex);
+		return -EBUSY;
+	}
+	cscfg_mgr->load_state = CSCFG_LOAD;
+
+	/* first load and add to the lists */
+	err = cscfg_load_owned_cfgs_feats(config_descs, feat_descs, owner_info);
+	if (err)
+		goto err_clean_load;
+
+	/* add the load owner to the load order list */
+	list_add_tail(&owner_info->item, &cscfg_mgr->load_order_list);
+	if (!list_is_singular(&cscfg_mgr->load_order_list)) {
+		/* lock previous item in load order list */
+		err = cscfg_owner_get(list_prev_entry(owner_info, item));
+		if (err)
+			goto err_clean_owner_list;
+	}
+
+	/*
+	 * make visible to configfs - configfs manipulation must occur outside
+	 * the list mutex lock to avoid circular lockdep issues with configfs
+	 * built in mutexes and semaphores. This is safe as it is not possible
+	 * to start a new load/unload operation till the current one is done.
+	 */
+	mutex_unlock(&cscfg_mutex);
+
+	/* create the configfs elements */
+	err = cscfg_fs_register_cfgs_feats(config_descs, feat_descs);
+	mutex_lock(&cscfg_mutex);
+
+	if (err)
+		goto err_clean_cfs;
+
+	/* mark any new configs as available for activation */
+	cscfg_set_configs_available(config_descs);
+	goto exit_unlock;
+
+err_clean_cfs:
+	/* cleanup after error registering with configfs */
+	cscfg_fs_unregister_cfgs_feats(owner_info);
+
+	if (!list_is_singular(&cscfg_mgr->load_order_list))
+		cscfg_owner_put(list_prev_entry(owner_info, item));
+
+err_clean_owner_list:
+	list_del(&owner_info->item);
+
+err_clean_load:
+	cscfg_unload_owned_cfgs_feats(owner_info);
+
+exit_unlock:
+	cscfg_mgr->load_state = CSCFG_NONE;
+	mutex_unlock(&cscfg_mutex);
+	return err;
+}
+EXPORT_SYMBOL_GPL(cscfg_load_config_sets);
+
+/**
+ * cscfg_unload_config_sets - unload a set of configurations by owner.
+ *
+ * Dynamic unload of configuration and feature sets is done on the basis of
+ * the load owner of that set. Later loaded configurations can depend on
+ * features loaded earlier.
+ *
+ * Therefore, unload is only possible if:-
+ * 1) no configurations are active.
+ * 2) the set being unloaded was the last to be loaded to maintain dependencies.
+ *
+ * Once the unload operation commences, we disallow any configuration being
+ * made active until it is complete.
+ *
+ * @owner_info:	Information on owner for set being unloaded.
+ */
+int cscfg_unload_config_sets(struct cscfg_load_owner_info *owner_info)
+{
+	int err = 0;
+	struct cscfg_load_owner_info *load_list_item = NULL;
+
+	mutex_lock(&cscfg_mutex);
+	if (cscfg_mgr->load_state != CSCFG_NONE) {
+		mutex_unlock(&cscfg_mutex);
+		return -EBUSY;
+	}
+
+	/* unload op in progress also prevents activation of any config */
+	cscfg_mgr->load_state = CSCFG_UNLOAD;
+
+	/* cannot unload if anything is active */
+	if (atomic_read(&cscfg_mgr->sys_active_cnt)) {
+		err = -EBUSY;
+		goto exit_unlock;
+	}
+
+	/* cannot unload if not last loaded in load order */
+	if (!list_empty(&cscfg_mgr->load_order_list)) {
+		load_list_item = list_last_entry(&cscfg_mgr->load_order_list,
+						 struct cscfg_load_owner_info, item);
+		if (load_list_item != owner_info)
+			load_list_item = NULL;
+	}
+
+	if (!load_list_item) {
+		err = -EINVAL;
+		goto exit_unlock;
+	}
+
+	/* remove from configfs - again outside the scope of the list mutex */
+	mutex_unlock(&cscfg_mutex);
+	cscfg_fs_unregister_cfgs_feats(owner_info);
+	mutex_lock(&cscfg_mutex);
+
+	/* unload everything from lists belonging to load_owner */
+	cscfg_unload_owned_cfgs_feats(owner_info);
+
+	/* remove from load order list */
+	if (!list_is_singular(&cscfg_mgr->load_order_list)) {
+		/* unlock previous item in load order list */
+		cscfg_owner_put(list_prev_entry(owner_info, item));
+	}
+	list_del(&owner_info->item);
+
+exit_unlock:
+	cscfg_mgr->load_state = CSCFG_NONE;
+	mutex_unlock(&cscfg_mutex);
+	return err;
+}
+EXPORT_SYMBOL_GPL(cscfg_unload_config_sets);
+
+/* Handle coresight device registration and add configs and features to devices */
+
+/* iterate through config lists and load matching configs to device */
+static int cscfg_add_cfgs_csdev(struct coresight_device *csdev)
+{
+	struct cscfg_config_desc *config_desc;
+	int err = 0;
+
+	list_for_each_entry(config_desc, &cscfg_mgr->config_desc_list, item) {
+		err = cscfg_add_csdev_cfg(csdev, config_desc);
+		if (err)
+			break;
+	}
+	return err;
+}
+
+/* iterate through feature lists and load matching features to device */
+static int cscfg_add_feats_csdev(struct coresight_device *csdev,
+				 u32 match_flags,
+				 struct cscfg_csdev_feat_ops *ops)
+{
+	struct cscfg_feature_desc *feat_desc;
+	int err = 0;
+
+	if (!ops->load_feat)
+		return -EINVAL;
+
+	list_for_each_entry(feat_desc, &cscfg_mgr->feat_desc_list, item) {
+		if (feat_desc->match_flags & match_flags) {
+			err = cscfg_load_feat_csdev(csdev, feat_desc, ops);
+			if (err)
+				break;
+		}
+	}
+	return err;
+}
+
+/* Add coresight device to list and copy its matching info */
+static int cscfg_list_add_csdev(struct coresight_device *csdev,
+				u32 match_flags,
+				struct cscfg_csdev_feat_ops *ops)
+{
+	struct cscfg_registered_csdev *csdev_item;
+
+	/* allocate the list entry structure */
+	csdev_item = kzalloc(sizeof(struct cscfg_registered_csdev), GFP_KERNEL);
+	if (!csdev_item)
+		return -ENOMEM;
+
+	csdev_item->csdev = csdev;
+	csdev_item->match_flags = match_flags;
+	csdev_item->ops.load_feat = ops->load_feat;
+	list_add(&csdev_item->item, &cscfg_mgr->csdev_desc_list);
+
+	INIT_LIST_HEAD(&csdev->feature_csdev_list);
+	INIT_LIST_HEAD(&csdev->config_csdev_list);
+	spin_lock_init(&csdev->cscfg_csdev_lock);
+
+	return 0;
+}
+
+/* remove a coresight device from the list and free data */
+static void cscfg_list_remove_csdev(struct coresight_device *csdev)
+{
+	struct cscfg_registered_csdev *csdev_item, *tmp;
+
+	list_for_each_entry_safe(csdev_item, tmp, &cscfg_mgr->csdev_desc_list, item) {
+		if (csdev_item->csdev == csdev) {
+			list_del(&csdev_item->item);
+			kfree(csdev_item);
+			break;
+		}
+	}
+}
+
+/**
+ * cscfg_register_csdev - register a coresight device with the syscfg manager.
+ *
+ * Registers the coresight device with the system. @match_flags used to check
+ * if the device is a match for registered features. Any currently registered
+ * configurations and features that match the device will be loaded onto it.
+ *
+ * @csdev:		The coresight device to register.
+ * @match_flags:	Matching information to load features.
+ * @ops:		Standard operations supported by the device.
+ */
+int cscfg_register_csdev(struct coresight_device *csdev,
+			 u32 match_flags,
+			 struct cscfg_csdev_feat_ops *ops)
+{
+	int ret = 0;
+
+	mutex_lock(&cscfg_mutex);
+
+	/* add device to list of registered devices  */
+	ret = cscfg_list_add_csdev(csdev, match_flags, ops);
+	if (ret)
+		goto reg_csdev_unlock;
+
+	/* now load any registered features and configs matching the device. */
+	ret = cscfg_add_feats_csdev(csdev, match_flags, ops);
+	if (ret) {
+		cscfg_list_remove_csdev(csdev);
+		goto reg_csdev_unlock;
+	}
+
+	ret = cscfg_add_cfgs_csdev(csdev);
+	if (ret) {
+		cscfg_list_remove_csdev(csdev);
+		goto reg_csdev_unlock;
+	}
+
+	pr_info("CSCFG registered %s", dev_name(&csdev->dev));
+
+reg_csdev_unlock:
+	mutex_unlock(&cscfg_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cscfg_register_csdev);
+
+/**
+ * cscfg_unregister_csdev - remove coresight device from syscfg manager.
+ *
+ * @csdev: Device to remove.
+ */
+void cscfg_unregister_csdev(struct coresight_device *csdev)
+{
+	mutex_lock(&cscfg_mutex);
+	cscfg_list_remove_csdev(csdev);
+	mutex_unlock(&cscfg_mutex);
+}
+EXPORT_SYMBOL_GPL(cscfg_unregister_csdev);
+
+/**
+ * cscfg_csdev_reset_feats - reset features for a CoreSight device.
+ *
+ * Resets all parameters and register values for any features loaded
+ * into @csdev to their default values.
+ *
+ * @csdev: The CoreSight device.
+ */
+void cscfg_csdev_reset_feats(struct coresight_device *csdev)
+{
+	struct cscfg_feature_csdev *feat_csdev;
+	unsigned long flags;
+
+	spin_lock_irqsave(&csdev->cscfg_csdev_lock, flags);
+	if (list_empty(&csdev->feature_csdev_list))
+		goto unlock_exit;
+
+	list_for_each_entry(feat_csdev, &csdev->feature_csdev_list, node)
+		cscfg_reset_feat(feat_csdev);
+
+unlock_exit:
+	spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags);
+}
+EXPORT_SYMBOL_GPL(cscfg_csdev_reset_feats);
+
+/*
+ * This activate configuration for either perf or sysfs. Perf can have multiple
+ * active configs, selected per event, sysfs is limited to one.
+ *
+ * Increments the configuration descriptor active count and the global active
+ * count.
+ *
+ * @cfg_hash: Hash value of the selected configuration name.
+ */
+static int _cscfg_activate_config(unsigned long cfg_hash)
+{
+	struct cscfg_config_desc *config_desc;
+	int err = -EINVAL;
+
+	if (cscfg_mgr->load_state == CSCFG_UNLOAD)
+		return -EBUSY;
+
+	list_for_each_entry(config_desc, &cscfg_mgr->config_desc_list, item) {
+		if ((unsigned long)config_desc->event_ea->var == cfg_hash) {
+			/* if we happen upon a partly loaded config, can't use it */
+			if (config_desc->available == false)
+				return -EBUSY;
+
+			/* must ensure that config cannot be unloaded in use */
+			err = cscfg_owner_get(config_desc->load_owner);
+			if (err)
+				break;
+			/*
+			 * increment the global active count - control changes to
+			 * active configurations
+			 */
+			atomic_inc(&cscfg_mgr->sys_active_cnt);
+
+			/*
+			 * mark the descriptor as active so enable config on a
+			 * device instance will use it
+			 */
+			atomic_inc(&config_desc->active_cnt);
+
+			err = 0;
+			dev_dbg(cscfg_device(), "Activate config %s.\n", config_desc->name);
+			break;
+		}
+	}
+	return err;
+}
+
+static void _cscfg_deactivate_config(unsigned long cfg_hash)
+{
+	struct cscfg_config_desc *config_desc;
+
+	list_for_each_entry(config_desc, &cscfg_mgr->config_desc_list, item) {
+		if ((unsigned long)config_desc->event_ea->var == cfg_hash) {
+			atomic_dec(&config_desc->active_cnt);
+			atomic_dec(&cscfg_mgr->sys_active_cnt);
+			cscfg_owner_put(config_desc->load_owner);
+			dev_dbg(cscfg_device(), "Deactivate config %s.\n", config_desc->name);
+			break;
+		}
+	}
+}
+
+/*
+ * called from configfs to set/clear the active configuration for use when
+ * using sysfs to control trace.
+ */
+int cscfg_config_sysfs_activate(struct cscfg_config_desc *config_desc, bool activate)
+{
+	unsigned long cfg_hash;
+	int err = 0;
+
+	mutex_lock(&cscfg_mutex);
+
+	cfg_hash = (unsigned long)config_desc->event_ea->var;
+
+	if (activate) {
+		/* cannot be a current active value to activate this */
+		if (cscfg_mgr->sysfs_active_config) {
+			err = -EBUSY;
+			goto exit_unlock;
+		}
+		err = _cscfg_activate_config(cfg_hash);
+		if (!err)
+			cscfg_mgr->sysfs_active_config = cfg_hash;
+	} else {
+		/* disable if matching current value */
+		if (cscfg_mgr->sysfs_active_config == cfg_hash) {
+			_cscfg_deactivate_config(cfg_hash);
+			cscfg_mgr->sysfs_active_config = 0;
+		} else
+			err = -EINVAL;
+	}
+
+exit_unlock:
+	mutex_unlock(&cscfg_mutex);
+	return err;
+}
+
+/* set the sysfs preset value */
+void cscfg_config_sysfs_set_preset(int preset)
+{
+	mutex_lock(&cscfg_mutex);
+	cscfg_mgr->sysfs_active_preset = preset;
+	mutex_unlock(&cscfg_mutex);
+}
+
+/*
+ * Used by a device to get the config and preset selected as active in configfs,
+ * when using sysfs to control trace.
+ */
+void cscfg_config_sysfs_get_active_cfg(unsigned long *cfg_hash, int *preset)
+{
+	mutex_lock(&cscfg_mutex);
+	*preset = cscfg_mgr->sysfs_active_preset;
+	*cfg_hash = cscfg_mgr->sysfs_active_config;
+	mutex_unlock(&cscfg_mutex);
+}
+EXPORT_SYMBOL_GPL(cscfg_config_sysfs_get_active_cfg);
+
+/**
+ * cscfg_activate_config -  Mark a configuration descriptor as active.
+ *
+ * This will be seen when csdev devices are enabled in the system.
+ * Only activated configurations can be enabled on individual devices.
+ * Activation protects the configuration from alteration or removal while
+ * active.
+ *
+ * Selection by hash value - generated from the configuration name when it
+ * was loaded and added to the cs_etm/configurations file system for selection
+ * by perf.
+ *
+ * @cfg_hash: Hash value of the selected configuration name.
+ */
+int cscfg_activate_config(unsigned long cfg_hash)
+{
+	int err = 0;
+
+	mutex_lock(&cscfg_mutex);
+	err = _cscfg_activate_config(cfg_hash);
+	mutex_unlock(&cscfg_mutex);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(cscfg_activate_config);
+
+/**
+ * cscfg_deactivate_config -  Mark a config descriptor as inactive.
+ *
+ * Decrement the configuration and global active counts.
+ *
+ * @cfg_hash: Hash value of the selected configuration name.
+ */
+void cscfg_deactivate_config(unsigned long cfg_hash)
+{
+	mutex_lock(&cscfg_mutex);
+	_cscfg_deactivate_config(cfg_hash);
+	mutex_unlock(&cscfg_mutex);
+}
+EXPORT_SYMBOL_GPL(cscfg_deactivate_config);
+
+/**
+ * cscfg_csdev_enable_active_config - Enable matching active configuration for device.
+ *
+ * Enables the configuration selected by @cfg_hash if the configuration is supported
+ * on the device and has been activated.
+ *
+ * If active and supported the CoreSight device @csdev will be programmed with the
+ * configuration, using @preset parameters.
+ *
+ * Should be called before driver hardware enable for the requested device, prior to
+ * programming and enabling the physical hardware.
+ *
+ * @csdev:	CoreSight device to program.
+ * @cfg_hash:	Selector for the configuration.
+ * @preset:	Preset parameter values to use, 0 for current / default values.
+ */
+int cscfg_csdev_enable_active_config(struct coresight_device *csdev,
+				     unsigned long cfg_hash, int preset)
+{
+	struct cscfg_config_csdev *config_csdev_active = NULL, *config_csdev_item;
+	const struct cscfg_config_desc *config_desc;
+	unsigned long flags;
+	int err = 0;
+
+	/* quickly check global count */
+	if (!atomic_read(&cscfg_mgr->sys_active_cnt))
+		return 0;
+
+	/*
+	 * Look for matching configuration - set the active configuration
+	 * context if found.
+	 */
+	spin_lock_irqsave(&csdev->cscfg_csdev_lock, flags);
+	list_for_each_entry(config_csdev_item, &csdev->config_csdev_list, node) {
+		config_desc = config_csdev_item->config_desc;
+		if ((atomic_read(&config_desc->active_cnt)) &&
+		    ((unsigned long)config_desc->event_ea->var == cfg_hash)) {
+			config_csdev_active = config_csdev_item;
+			csdev->active_cscfg_ctxt = (void *)config_csdev_active;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags);
+
+	/*
+	 * If found, attempt to enable
+	 */
+	if (config_csdev_active) {
+		/*
+		 * Call the generic routine that will program up the internal
+		 * driver structures prior to programming up the hardware.
+		 * This routine takes the driver spinlock saved in the configs.
+		 */
+		err = cscfg_csdev_enable_config(config_csdev_active, preset);
+		if (!err) {
+			/*
+			 * Successful programming. Check the active_cscfg_ctxt
+			 * pointer to ensure no pre-emption disabled it via
+			 * cscfg_csdev_disable_active_config() before
+			 * we could start.
+			 *
+			 * Set enabled if OK, err if not.
+			 */
+			spin_lock_irqsave(&csdev->cscfg_csdev_lock, flags);
+			if (csdev->active_cscfg_ctxt)
+				config_csdev_active->enabled = true;
+			else
+				err = -EBUSY;
+			spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags);
+		}
+	}
+	return err;
+}
+EXPORT_SYMBOL_GPL(cscfg_csdev_enable_active_config);
+
+/**
+ * cscfg_csdev_disable_active_config - disable an active config on the device.
+ *
+ * Disables the active configuration on the CoreSight device @csdev.
+ * Disable will save the values of any registers marked in the configurations
+ * as save on disable.
+ *
+ * Should be called after driver hardware disable for the requested device,
+ * after disabling the physical hardware and reading back registers.
+ *
+ * @csdev: The CoreSight device.
+ */
+void cscfg_csdev_disable_active_config(struct coresight_device *csdev)
+{
+	struct cscfg_config_csdev *config_csdev;
+	unsigned long flags;
+
+	/*
+	 * Check if we have an active config, and that it was successfully enabled.
+	 * If it was not enabled, we have no work to do, otherwise mark as disabled.
+	 * Clear the active config pointer.
+	 */
+	spin_lock_irqsave(&csdev->cscfg_csdev_lock, flags);
+	config_csdev = (struct cscfg_config_csdev *)csdev->active_cscfg_ctxt;
+	if (config_csdev) {
+		if (!config_csdev->enabled)
+			config_csdev = NULL;
+		else
+			config_csdev->enabled = false;
+	}
+	csdev->active_cscfg_ctxt = NULL;
+	spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags);
+
+	/* true if there was an enabled active config */
+	if (config_csdev)
+		cscfg_csdev_disable_config(config_csdev);
+}
+EXPORT_SYMBOL_GPL(cscfg_csdev_disable_active_config);
+
+/* Initialise system configuration management device. */
+
+struct device *cscfg_device(void)
+{
+	return cscfg_mgr ? &cscfg_mgr->dev : NULL;
+}
+
+/* Must have a release function or the kernel will complain on module unload */
+static void cscfg_dev_release(struct device *dev)
+{
+	mutex_lock(&cscfg_mutex);
+	kfree(cscfg_mgr);
+	cscfg_mgr = NULL;
+	mutex_unlock(&cscfg_mutex);
+}
+
+/* a device is needed to "own" some kernel elements such as sysfs entries.  */
+static int cscfg_create_device(void)
+{
+	struct device *dev;
+	int err = -ENOMEM;
+
+	mutex_lock(&cscfg_mutex);
+	if (cscfg_mgr) {
+		err = -EINVAL;
+		goto create_dev_exit_unlock;
+	}
+
+	cscfg_mgr = kzalloc(sizeof(struct cscfg_manager), GFP_KERNEL);
+	if (!cscfg_mgr)
+		goto create_dev_exit_unlock;
+
+	/* initialise the cscfg_mgr structure */
+	INIT_LIST_HEAD(&cscfg_mgr->csdev_desc_list);
+	INIT_LIST_HEAD(&cscfg_mgr->feat_desc_list);
+	INIT_LIST_HEAD(&cscfg_mgr->config_desc_list);
+	INIT_LIST_HEAD(&cscfg_mgr->load_order_list);
+	atomic_set(&cscfg_mgr->sys_active_cnt, 0);
+	cscfg_mgr->load_state = CSCFG_NONE;
+
+	/* setup the device */
+	dev = cscfg_device();
+	dev->release = cscfg_dev_release;
+	dev->init_name = "cs_system_cfg";
+
+	err = device_register(dev);
+	if (err)
+		put_device(dev);
+
+create_dev_exit_unlock:
+	mutex_unlock(&cscfg_mutex);
+	return err;
+}
+
+/*
+ * Loading and unloading is generally on user discretion.
+ * If exiting due to coresight module unload, we need to unload any configurations that remain,
+ * before we unregister the configfs intrastructure.
+ *
+ * Do this by walking the load_owner list and taking appropriate action, depending on the load
+ * owner type.
+ */
+static void cscfg_unload_cfgs_on_exit(void)
+{
+	struct cscfg_load_owner_info *owner_info = NULL;
+
+	/*
+	 * grab the mutex - even though we are exiting, some configfs files
+	 * may still be live till we dump them, so ensure list data is
+	 * protected from a race condition.
+	 */
+	mutex_lock(&cscfg_mutex);
+	while (!list_empty(&cscfg_mgr->load_order_list)) {
+
+		/* remove in reverse order of loading */
+		owner_info = list_last_entry(&cscfg_mgr->load_order_list,
+					     struct cscfg_load_owner_info, item);
+
+		/* action according to type */
+		switch (owner_info->type) {
+		case CSCFG_OWNER_PRELOAD:
+			/*
+			 * preloaded  descriptors are statically allocated in
+			 * this module - just need to unload dynamic items from
+			 * csdev lists, and remove from configfs directories.
+			 */
+			pr_info("cscfg: unloading preloaded configurations\n");
+			break;
+
+		case  CSCFG_OWNER_MODULE:
+			/*
+			 * this is an error - the loadable module must have been unloaded prior
+			 * to the coresight module unload. Therefore that module has not
+			 * correctly unloaded configs in its own exit code.
+			 * Nothing to do other than emit an error string as the static descriptor
+			 * references we need to unload will have disappeared with the module.
+			 */
+			pr_err("cscfg: ERROR: prior module failed to unload configuration\n");
+			goto list_remove;
+		}
+
+		/* remove from configfs - outside the scope of the list mutex */
+		mutex_unlock(&cscfg_mutex);
+		cscfg_fs_unregister_cfgs_feats(owner_info);
+		mutex_lock(&cscfg_mutex);
+
+		/* Next unload from csdev lists. */
+		cscfg_unload_owned_cfgs_feats(owner_info);
+
+list_remove:
+		/* remove from load order list */
+		list_del(&owner_info->item);
+	}
+	mutex_unlock(&cscfg_mutex);
+}
+
+static void cscfg_clear_device(void)
+{
+	cscfg_unload_cfgs_on_exit();
+	cscfg_configfs_release(cscfg_mgr);
+	device_unregister(cscfg_device());
+}
+
+/* Initialise system config management API device  */
+int __init cscfg_init(void)
+{
+	int err = 0;
+
+	/* create the device and init cscfg_mgr */
+	err = cscfg_create_device();
+	if (err)
+		return err;
+
+	/* initialise configfs subsystem */
+	err = cscfg_configfs_init(cscfg_mgr);
+	if (err)
+		goto exit_err;
+
+	/* preload built-in configurations */
+	err = cscfg_preload(THIS_MODULE);
+	if (err)
+		goto exit_err;
+
+	dev_info(cscfg_device(), "CoreSight Configuration manager initialised");
+	return 0;
+
+exit_err:
+	cscfg_clear_device();
+	return err;
+}
+
+void cscfg_exit(void)
+{
+	cscfg_clear_device();
+}
diff --git a/drivers/hwtracing/coresight/coresight-syscfg.h b/drivers/hwtracing/coresight/coresight-syscfg.h
new file mode 100644
index 0000000000..66e2db890d
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-syscfg.h
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Coresight system configuration driver.
+ */
+
+#ifndef CORESIGHT_SYSCFG_H
+#define CORESIGHT_SYSCFG_H
+
+#include <linux/configfs.h>
+#include <linux/coresight.h>
+#include <linux/device.h>
+
+#include "coresight-config.h"
+
+/*
+ * Load operation types.
+ * When loading or unloading, another load operation cannot be run.
+ * When unloading configurations cannot be activated.
+ */
+enum cscfg_load_ops {
+	CSCFG_NONE,
+	CSCFG_LOAD,
+	CSCFG_UNLOAD
+};
+
+/**
+ * System configuration manager device.
+ *
+ * Contains lists of the loaded configurations and features, plus a list of CoreSight devices
+ * registered with the system as supporting configuration management.
+ *
+ * Need a device to 'own' some coresight system wide sysfs entries in
+ * perf events, configfs etc.
+ *
+ * @dev:		The device.
+ * @csdev_desc_list:	List of coresight devices registered with the configuration manager.
+ * @feat_desc_list:	List of feature descriptors to load into registered devices.
+ * @config_desc_list:	List of system configuration descriptors to load into registered devices.
+ * @load_order_list:    Ordered list of owners for dynamically loaded configurations.
+ * @sys_active_cnt:	Total number of active config descriptor references.
+ * @cfgfs_subsys:	configfs subsystem used to manage configurations.
+ * @sysfs_active_config:Active config hash used if CoreSight controlled from sysfs.
+ * @sysfs_active_preset:Active preset index used if CoreSight controlled from sysfs.
+ * @load_state:		A multi-stage load/unload operation is in progress.
+ */
+struct cscfg_manager {
+	struct device dev;
+	struct list_head csdev_desc_list;
+	struct list_head feat_desc_list;
+	struct list_head config_desc_list;
+	struct list_head load_order_list;
+	atomic_t sys_active_cnt;
+	struct configfs_subsystem cfgfs_subsys;
+	u32 sysfs_active_config;
+	int sysfs_active_preset;
+	enum cscfg_load_ops load_state;
+};
+
+/* get reference to dev in cscfg_manager */
+struct device *cscfg_device(void);
+
+/**
+ * List entry for Coresight devices that are registered as supporting complex
+ * config operations.
+ *
+ * @csdev:	 The registered device.
+ * @match_flags: The matching type information for adding features.
+ * @ops:	 Operations supported by the registered device.
+ * @item:	 list entry.
+ */
+struct cscfg_registered_csdev {
+	struct coresight_device *csdev;
+	u32 match_flags;
+	struct cscfg_csdev_feat_ops ops;
+	struct list_head item;
+};
+
+/* owner types for loading and unloading of config and feature sets */
+enum cscfg_load_owner_type {
+	CSCFG_OWNER_PRELOAD,
+	CSCFG_OWNER_MODULE,
+};
+
+/**
+ * Load item - item to add to the load order list allowing dynamic load and
+ *             unload of configurations and features. Caller loading a config
+ *	       set provides a context handle for unload. API ensures that
+ *	       items unloaded strictly in reverse order from load to ensure
+ *	       dependencies are respected.
+ *
+ * @item:		list entry for load order list.
+ * @type:		type of owner - allows interpretation of owner_handle.
+ * @owner_handle:	load context - handle for owner of loaded configs.
+ */
+struct cscfg_load_owner_info {
+	struct list_head item;
+	int type;
+	void *owner_handle;
+};
+
+/* internal core operations for cscfg */
+int __init cscfg_init(void);
+void cscfg_exit(void);
+int cscfg_preload(void *owner_handle);
+const struct cscfg_feature_desc *cscfg_get_named_feat_desc(const char *name);
+int cscfg_update_feat_param_val(struct cscfg_feature_desc *feat_desc,
+				int param_idx, u64 value);
+int cscfg_config_sysfs_activate(struct cscfg_config_desc *cfg_desc, bool activate);
+void cscfg_config_sysfs_set_preset(int preset);
+
+/* syscfg manager external API */
+int cscfg_load_config_sets(struct cscfg_config_desc **cfg_descs,
+			   struct cscfg_feature_desc **feat_descs,
+			   struct cscfg_load_owner_info *owner_info);
+int cscfg_unload_config_sets(struct cscfg_load_owner_info *owner_info);
+int cscfg_register_csdev(struct coresight_device *csdev, u32 match_flags,
+			 struct cscfg_csdev_feat_ops *ops);
+void cscfg_unregister_csdev(struct coresight_device *csdev);
+int cscfg_activate_config(unsigned long cfg_hash);
+void cscfg_deactivate_config(unsigned long cfg_hash);
+void cscfg_csdev_reset_feats(struct coresight_device *csdev);
+int cscfg_csdev_enable_active_config(struct coresight_device *csdev,
+				     unsigned long cfg_hash, int preset);
+void cscfg_csdev_disable_active_config(struct coresight_device *csdev);
+void cscfg_config_sysfs_get_active_cfg(unsigned long *cfg_hash, int *preset);
+
+#endif /* CORESIGHT_SYSCFG_H */
diff --git a/drivers/hwtracing/coresight/coresight-sysfs.c b/drivers/hwtracing/coresight/coresight-sysfs.c
new file mode 100644
index 0000000000..dd78e9fcfc
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-sysfs.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019, Linaro Limited, All rights reserved.
+ * Author: Mike Leach <mike.leach@linaro.org>
+ */
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+
+#include "coresight-priv.h"
+
+/*
+ * Connections group - links attribute.
+ * Count of created links between coresight components in the group.
+ */
+static ssize_t nr_links_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	struct coresight_device *csdev = to_coresight_device(dev);
+
+	return sprintf(buf, "%d\n", csdev->nr_links);
+}
+static DEVICE_ATTR_RO(nr_links);
+
+static struct attribute *coresight_conns_attrs[] = {
+	&dev_attr_nr_links.attr,
+	NULL,
+};
+
+static struct attribute_group coresight_conns_group = {
+	.attrs = coresight_conns_attrs,
+	.name = "connections",
+};
+
+/*
+ * Create connections group for CoreSight devices.
+ * This group will then be used to collate the sysfs links between
+ * devices.
+ */
+int coresight_create_conns_sysfs_group(struct coresight_device *csdev)
+{
+	int ret = 0;
+
+	if (!csdev)
+		return -EINVAL;
+
+	ret = sysfs_create_group(&csdev->dev.kobj, &coresight_conns_group);
+	if (ret)
+		return ret;
+
+	csdev->has_conns_grp = true;
+	return ret;
+}
+
+void coresight_remove_conns_sysfs_group(struct coresight_device *csdev)
+{
+	if (!csdev)
+		return;
+
+	if (csdev->has_conns_grp) {
+		sysfs_remove_group(&csdev->dev.kobj, &coresight_conns_group);
+		csdev->has_conns_grp = false;
+	}
+}
+
+int coresight_add_sysfs_link(struct coresight_sysfs_link *info)
+{
+	int ret = 0;
+
+	if (!info)
+		return -EINVAL;
+	if (!info->orig || !info->target ||
+	    !info->orig_name || !info->target_name)
+		return -EINVAL;
+	if (!info->orig->has_conns_grp || !info->target->has_conns_grp)
+		return -EINVAL;
+
+	/* first link orig->target */
+	ret = sysfs_add_link_to_group(&info->orig->dev.kobj,
+				      coresight_conns_group.name,
+				      &info->target->dev.kobj,
+				      info->orig_name);
+	if (ret)
+		return ret;
+
+	/* second link target->orig */
+	ret = sysfs_add_link_to_group(&info->target->dev.kobj,
+				      coresight_conns_group.name,
+				      &info->orig->dev.kobj,
+				      info->target_name);
+
+	/* error in second link - remove first - otherwise inc counts */
+	if (ret) {
+		sysfs_remove_link_from_group(&info->orig->dev.kobj,
+					     coresight_conns_group.name,
+					     info->orig_name);
+	} else {
+		info->orig->nr_links++;
+		info->target->nr_links++;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(coresight_add_sysfs_link);
+
+void coresight_remove_sysfs_link(struct coresight_sysfs_link *info)
+{
+	if (!info)
+		return;
+	if (!info->orig || !info->target ||
+	    !info->orig_name || !info->target_name)
+		return;
+
+	sysfs_remove_link_from_group(&info->orig->dev.kobj,
+				     coresight_conns_group.name,
+				     info->orig_name);
+
+	sysfs_remove_link_from_group(&info->target->dev.kobj,
+				     coresight_conns_group.name,
+				     info->target_name);
+
+	info->orig->nr_links--;
+	info->target->nr_links--;
+}
+EXPORT_SYMBOL_GPL(coresight_remove_sysfs_link);
+
+/*
+ * coresight_make_links: Make a link for a connection from a @orig
+ * device to @target, represented by @conn.
+ *
+ *   e.g, for devOrig[output_X] -> devTarget[input_Y] is represented
+ *   as two symbolic links :
+ *
+ *	/sys/.../devOrig/out:X	-> /sys/.../devTarget/
+ *	/sys/.../devTarget/in:Y	-> /sys/.../devOrig/
+ *
+ * The link names are allocated for a device where it appears. i.e, the
+ * "out" link on the master and "in" link on the slave device.
+ * The link info is stored in the connection record for avoiding
+ * the reconstruction of names for removal.
+ */
+int coresight_make_links(struct coresight_device *orig,
+			 struct coresight_connection *conn,
+			 struct coresight_device *target)
+{
+	int ret = -ENOMEM;
+	char *outs = NULL, *ins = NULL;
+	struct coresight_sysfs_link *link = NULL;
+
+	/* Helper devices aren't shown in sysfs */
+	if (conn->dest_port == -1 && conn->src_port == -1)
+		return 0;
+
+	do {
+		outs = devm_kasprintf(&orig->dev, GFP_KERNEL,
+				      "out:%d", conn->src_port);
+		if (!outs)
+			break;
+		ins = devm_kasprintf(&target->dev, GFP_KERNEL,
+				     "in:%d", conn->dest_port);
+		if (!ins)
+			break;
+		link = devm_kzalloc(&orig->dev,
+				    sizeof(struct coresight_sysfs_link),
+				    GFP_KERNEL);
+		if (!link)
+			break;
+
+		link->orig = orig;
+		link->target = target;
+		link->orig_name = outs;
+		link->target_name = ins;
+
+		ret = coresight_add_sysfs_link(link);
+		if (ret)
+			break;
+
+		conn->link = link;
+		return 0;
+	} while (0);
+
+	return ret;
+}
+
+/*
+ * coresight_remove_links: Remove the sysfs links for a given connection @conn,
+ * from @orig device to @target device. See coresight_make_links() for more
+ * details.
+ */
+void coresight_remove_links(struct coresight_device *orig,
+			    struct coresight_connection *conn)
+{
+	if (!orig || !conn->link)
+		return;
+
+	coresight_remove_sysfs_link(conn->link);
+
+	devm_kfree(&conn->dest_dev->dev, conn->link->target_name);
+	devm_kfree(&orig->dev, conn->link->orig_name);
+	devm_kfree(&orig->dev, conn->link);
+	conn->link = NULL;
+}
diff --git a/drivers/hwtracing/coresight/coresight-tmc-core.c b/drivers/hwtracing/coresight/coresight-tmc-core.c
new file mode 100644
index 0000000000..c106d142e6
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-tmc-core.c
@@ -0,0 +1,607 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ *
+ * Description: CoreSight Trace Memory Controller driver
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/idr.h>
+#include <linux/io.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/mutex.h>
+#include <linux/property.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/pm_runtime.h>
+#include <linux/of.h>
+#include <linux/coresight.h>
+#include <linux/amba/bus.h>
+
+#include "coresight-priv.h"
+#include "coresight-tmc.h"
+
+DEFINE_CORESIGHT_DEVLIST(etb_devs, "tmc_etb");
+DEFINE_CORESIGHT_DEVLIST(etf_devs, "tmc_etf");
+DEFINE_CORESIGHT_DEVLIST(etr_devs, "tmc_etr");
+
+int tmc_wait_for_tmcready(struct tmc_drvdata *drvdata)
+{
+	struct coresight_device *csdev = drvdata->csdev;
+	struct csdev_access *csa = &csdev->access;
+
+	/* Ensure formatter, unformatter and hardware fifo are empty */
+	if (coresight_timeout(csa, TMC_STS, TMC_STS_TMCREADY_BIT, 1)) {
+		dev_err(&csdev->dev,
+			"timeout while waiting for TMC to be Ready\n");
+		return -EBUSY;
+	}
+	return 0;
+}
+
+void tmc_flush_and_stop(struct tmc_drvdata *drvdata)
+{
+	struct coresight_device *csdev = drvdata->csdev;
+	struct csdev_access *csa = &csdev->access;
+	u32 ffcr;
+
+	ffcr = readl_relaxed(drvdata->base + TMC_FFCR);
+	ffcr |= TMC_FFCR_STOP_ON_FLUSH;
+	writel_relaxed(ffcr, drvdata->base + TMC_FFCR);
+	ffcr |= BIT(TMC_FFCR_FLUSHMAN_BIT);
+	writel_relaxed(ffcr, drvdata->base + TMC_FFCR);
+	/* Ensure flush completes */
+	if (coresight_timeout(csa, TMC_FFCR, TMC_FFCR_FLUSHMAN_BIT, 0)) {
+		dev_err(&csdev->dev,
+		"timeout while waiting for completion of Manual Flush\n");
+	}
+
+	tmc_wait_for_tmcready(drvdata);
+}
+
+void tmc_enable_hw(struct tmc_drvdata *drvdata)
+{
+	writel_relaxed(TMC_CTL_CAPT_EN, drvdata->base + TMC_CTL);
+}
+
+void tmc_disable_hw(struct tmc_drvdata *drvdata)
+{
+	writel_relaxed(0x0, drvdata->base + TMC_CTL);
+}
+
+u32 tmc_get_memwidth_mask(struct tmc_drvdata *drvdata)
+{
+	u32 mask = 0;
+
+	/*
+	 * When moving RRP or an offset address forward, the new values must
+	 * be byte-address aligned to the width of the trace memory databus
+	 * _and_ to a frame boundary (16 byte), whichever is the biggest. For
+	 * example, for 32-bit, 64-bit and 128-bit wide trace memory, the four
+	 * LSBs must be 0s. For 256-bit wide trace memory, the five LSBs must
+	 * be 0s.
+	 */
+	switch (drvdata->memwidth) {
+	case TMC_MEM_INTF_WIDTH_32BITS:
+	case TMC_MEM_INTF_WIDTH_64BITS:
+	case TMC_MEM_INTF_WIDTH_128BITS:
+		mask = GENMASK(31, 4);
+		break;
+	case TMC_MEM_INTF_WIDTH_256BITS:
+		mask = GENMASK(31, 5);
+		break;
+	}
+
+	return mask;
+}
+
+static int tmc_read_prepare(struct tmc_drvdata *drvdata)
+{
+	int ret = 0;
+
+	switch (drvdata->config_type) {
+	case TMC_CONFIG_TYPE_ETB:
+	case TMC_CONFIG_TYPE_ETF:
+		ret = tmc_read_prepare_etb(drvdata);
+		break;
+	case TMC_CONFIG_TYPE_ETR:
+		ret = tmc_read_prepare_etr(drvdata);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	if (!ret)
+		dev_dbg(&drvdata->csdev->dev, "TMC read start\n");
+
+	return ret;
+}
+
+static int tmc_read_unprepare(struct tmc_drvdata *drvdata)
+{
+	int ret = 0;
+
+	switch (drvdata->config_type) {
+	case TMC_CONFIG_TYPE_ETB:
+	case TMC_CONFIG_TYPE_ETF:
+		ret = tmc_read_unprepare_etb(drvdata);
+		break;
+	case TMC_CONFIG_TYPE_ETR:
+		ret = tmc_read_unprepare_etr(drvdata);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	if (!ret)
+		dev_dbg(&drvdata->csdev->dev, "TMC read end\n");
+
+	return ret;
+}
+
+static int tmc_open(struct inode *inode, struct file *file)
+{
+	int ret;
+	struct tmc_drvdata *drvdata = container_of(file->private_data,
+						   struct tmc_drvdata, miscdev);
+
+	ret = tmc_read_prepare(drvdata);
+	if (ret)
+		return ret;
+
+	nonseekable_open(inode, file);
+
+	dev_dbg(&drvdata->csdev->dev, "%s: successfully opened\n", __func__);
+	return 0;
+}
+
+static inline ssize_t tmc_get_sysfs_trace(struct tmc_drvdata *drvdata,
+					  loff_t pos, size_t len, char **bufpp)
+{
+	switch (drvdata->config_type) {
+	case TMC_CONFIG_TYPE_ETB:
+	case TMC_CONFIG_TYPE_ETF:
+		return tmc_etb_get_sysfs_trace(drvdata, pos, len, bufpp);
+	case TMC_CONFIG_TYPE_ETR:
+		return tmc_etr_get_sysfs_trace(drvdata, pos, len, bufpp);
+	}
+
+	return -EINVAL;
+}
+
+static ssize_t tmc_read(struct file *file, char __user *data, size_t len,
+			loff_t *ppos)
+{
+	char *bufp;
+	ssize_t actual;
+	struct tmc_drvdata *drvdata = container_of(file->private_data,
+						   struct tmc_drvdata, miscdev);
+	actual = tmc_get_sysfs_trace(drvdata, *ppos, len, &bufp);
+	if (actual <= 0)
+		return 0;
+
+	if (copy_to_user(data, bufp, actual)) {
+		dev_dbg(&drvdata->csdev->dev,
+			"%s: copy_to_user failed\n", __func__);
+		return -EFAULT;
+	}
+
+	*ppos += actual;
+	dev_dbg(&drvdata->csdev->dev, "%zu bytes copied\n", actual);
+
+	return actual;
+}
+
+static int tmc_release(struct inode *inode, struct file *file)
+{
+	int ret;
+	struct tmc_drvdata *drvdata = container_of(file->private_data,
+						   struct tmc_drvdata, miscdev);
+
+	ret = tmc_read_unprepare(drvdata);
+	if (ret)
+		return ret;
+
+	dev_dbg(&drvdata->csdev->dev, "%s: released\n", __func__);
+	return 0;
+}
+
+static const struct file_operations tmc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= tmc_open,
+	.read		= tmc_read,
+	.release	= tmc_release,
+	.llseek		= no_llseek,
+};
+
+static enum tmc_mem_intf_width tmc_get_memwidth(u32 devid)
+{
+	enum tmc_mem_intf_width memwidth;
+
+	/*
+	 * Excerpt from the TRM:
+	 *
+	 * DEVID::MEMWIDTH[10:8]
+	 * 0x2 Memory interface databus is 32 bits wide.
+	 * 0x3 Memory interface databus is 64 bits wide.
+	 * 0x4 Memory interface databus is 128 bits wide.
+	 * 0x5 Memory interface databus is 256 bits wide.
+	 */
+	switch (BMVAL(devid, 8, 10)) {
+	case 0x2:
+		memwidth = TMC_MEM_INTF_WIDTH_32BITS;
+		break;
+	case 0x3:
+		memwidth = TMC_MEM_INTF_WIDTH_64BITS;
+		break;
+	case 0x4:
+		memwidth = TMC_MEM_INTF_WIDTH_128BITS;
+		break;
+	case 0x5:
+		memwidth = TMC_MEM_INTF_WIDTH_256BITS;
+		break;
+	default:
+		memwidth = 0;
+	}
+
+	return memwidth;
+}
+
+static struct attribute *coresight_tmc_mgmt_attrs[] = {
+	coresight_simple_reg32(rsz, TMC_RSZ),
+	coresight_simple_reg32(sts, TMC_STS),
+	coresight_simple_reg64(rrp, TMC_RRP, TMC_RRPHI),
+	coresight_simple_reg64(rwp, TMC_RWP, TMC_RWPHI),
+	coresight_simple_reg32(trg, TMC_TRG),
+	coresight_simple_reg32(ctl, TMC_CTL),
+	coresight_simple_reg32(ffsr, TMC_FFSR),
+	coresight_simple_reg32(ffcr, TMC_FFCR),
+	coresight_simple_reg32(mode, TMC_MODE),
+	coresight_simple_reg32(pscr, TMC_PSCR),
+	coresight_simple_reg32(devid, CORESIGHT_DEVID),
+	coresight_simple_reg64(dba, TMC_DBALO, TMC_DBAHI),
+	coresight_simple_reg32(axictl, TMC_AXICTL),
+	coresight_simple_reg32(authstatus, TMC_AUTHSTATUS),
+	NULL,
+};
+
+static ssize_t trigger_cntr_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct tmc_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	unsigned long val = drvdata->trigger_cntr;
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t trigger_cntr_store(struct device *dev,
+			     struct device_attribute *attr,
+			     const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	drvdata->trigger_cntr = val;
+	return size;
+}
+static DEVICE_ATTR_RW(trigger_cntr);
+
+static ssize_t buffer_size_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct tmc_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	return sprintf(buf, "%#x\n", drvdata->size);
+}
+
+static ssize_t buffer_size_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	/* Only permitted for TMC-ETRs */
+	if (drvdata->config_type != TMC_CONFIG_TYPE_ETR)
+		return -EPERM;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret)
+		return ret;
+	/* The buffer size should be page aligned */
+	if (val & (PAGE_SIZE - 1))
+		return -EINVAL;
+	drvdata->size = val;
+	return size;
+}
+
+static DEVICE_ATTR_RW(buffer_size);
+
+static struct attribute *coresight_tmc_attrs[] = {
+	&dev_attr_trigger_cntr.attr,
+	&dev_attr_buffer_size.attr,
+	NULL,
+};
+
+static const struct attribute_group coresight_tmc_group = {
+	.attrs = coresight_tmc_attrs,
+};
+
+static const struct attribute_group coresight_tmc_mgmt_group = {
+	.attrs = coresight_tmc_mgmt_attrs,
+	.name = "mgmt",
+};
+
+static const struct attribute_group *coresight_tmc_groups[] = {
+	&coresight_tmc_group,
+	&coresight_tmc_mgmt_group,
+	NULL,
+};
+
+static inline bool tmc_etr_can_use_sg(struct device *dev)
+{
+	return fwnode_property_present(dev->fwnode, "arm,scatter-gather");
+}
+
+static inline bool tmc_etr_has_non_secure_access(struct tmc_drvdata *drvdata)
+{
+	u32 auth = readl_relaxed(drvdata->base + TMC_AUTHSTATUS);
+
+	return (auth & TMC_AUTH_NSID_MASK) == 0x3;
+}
+
+/* Detect and initialise the capabilities of a TMC ETR */
+static int tmc_etr_setup_caps(struct device *parent, u32 devid, void *dev_caps)
+{
+	int rc;
+	u32 dma_mask = 0;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(parent);
+
+	if (!tmc_etr_has_non_secure_access(drvdata))
+		return -EACCES;
+
+	/* Set the unadvertised capabilities */
+	tmc_etr_init_caps(drvdata, (u32)(unsigned long)dev_caps);
+
+	if (!(devid & TMC_DEVID_NOSCAT) && tmc_etr_can_use_sg(parent))
+		tmc_etr_set_cap(drvdata, TMC_ETR_SG);
+
+	/* Check if the AXI address width is available */
+	if (devid & TMC_DEVID_AXIAW_VALID)
+		dma_mask = ((devid >> TMC_DEVID_AXIAW_SHIFT) &
+				TMC_DEVID_AXIAW_MASK);
+
+	/*
+	 * Unless specified in the device configuration, ETR uses a 40-bit
+	 * AXI master in place of the embedded SRAM of ETB/ETF.
+	 */
+	switch (dma_mask) {
+	case 32:
+	case 40:
+	case 44:
+	case 48:
+	case 52:
+		dev_info(parent, "Detected dma mask %dbits\n", dma_mask);
+		break;
+	default:
+		dma_mask = 40;
+	}
+
+	rc = dma_set_mask_and_coherent(parent, DMA_BIT_MASK(dma_mask));
+	if (rc)
+		dev_err(parent, "Failed to setup DMA mask: %d\n", rc);
+	return rc;
+}
+
+static u32 tmc_etr_get_default_buffer_size(struct device *dev)
+{
+	u32 size;
+
+	if (fwnode_property_read_u32(dev->fwnode, "arm,buffer-size", &size))
+		size = SZ_1M;
+	return size;
+}
+
+static u32 tmc_etr_get_max_burst_size(struct device *dev)
+{
+	u32 burst_size;
+
+	if (fwnode_property_read_u32(dev->fwnode, "arm,max-burst-size",
+				     &burst_size))
+		return TMC_AXICTL_WR_BURST_16;
+
+	/* Only permissible values are 0 to 15 */
+	if (burst_size > 0xF)
+		burst_size = TMC_AXICTL_WR_BURST_16;
+
+	return burst_size;
+}
+
+static int tmc_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	int ret = 0;
+	u32 devid;
+	void __iomem *base;
+	struct device *dev = &adev->dev;
+	struct coresight_platform_data *pdata = NULL;
+	struct tmc_drvdata *drvdata;
+	struct resource *res = &adev->res;
+	struct coresight_desc desc = { 0 };
+	struct coresight_dev_list *dev_list = NULL;
+
+	ret = -ENOMEM;
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		goto out;
+
+	dev_set_drvdata(dev, drvdata);
+
+	/* Validity for the resource is already checked by the AMBA core */
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base)) {
+		ret = PTR_ERR(base);
+		goto out;
+	}
+
+	drvdata->base = base;
+	desc.access = CSDEV_ACCESS_IOMEM(base);
+
+	spin_lock_init(&drvdata->spinlock);
+
+	devid = readl_relaxed(drvdata->base + CORESIGHT_DEVID);
+	drvdata->config_type = BMVAL(devid, 6, 7);
+	drvdata->memwidth = tmc_get_memwidth(devid);
+	/* This device is not associated with a session */
+	drvdata->pid = -1;
+
+	if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) {
+		drvdata->size = tmc_etr_get_default_buffer_size(dev);
+		drvdata->max_burst_size = tmc_etr_get_max_burst_size(dev);
+	} else {
+		drvdata->size = readl_relaxed(drvdata->base + TMC_RSZ) * 4;
+	}
+
+	desc.dev = dev;
+	desc.groups = coresight_tmc_groups;
+
+	switch (drvdata->config_type) {
+	case TMC_CONFIG_TYPE_ETB:
+		desc.type = CORESIGHT_DEV_TYPE_SINK;
+		desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
+		desc.ops = &tmc_etb_cs_ops;
+		dev_list = &etb_devs;
+		break;
+	case TMC_CONFIG_TYPE_ETR:
+		desc.type = CORESIGHT_DEV_TYPE_SINK;
+		desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_SYSMEM;
+		desc.ops = &tmc_etr_cs_ops;
+		ret = tmc_etr_setup_caps(dev, devid,
+					 coresight_get_uci_data(id));
+		if (ret)
+			goto out;
+		idr_init(&drvdata->idr);
+		mutex_init(&drvdata->idr_mutex);
+		dev_list = &etr_devs;
+		break;
+	case TMC_CONFIG_TYPE_ETF:
+		desc.type = CORESIGHT_DEV_TYPE_LINKSINK;
+		desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
+		desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_FIFO;
+		desc.ops = &tmc_etf_cs_ops;
+		dev_list = &etf_devs;
+		break;
+	default:
+		pr_err("%s: Unsupported TMC config\n", desc.name);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	desc.name = coresight_alloc_device_name(dev_list, dev);
+	if (!desc.name) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	pdata = coresight_get_platform_data(dev);
+	if (IS_ERR(pdata)) {
+		ret = PTR_ERR(pdata);
+		goto out;
+	}
+	adev->dev.platform_data = pdata;
+	desc.pdata = pdata;
+
+	drvdata->csdev = coresight_register(&desc);
+	if (IS_ERR(drvdata->csdev)) {
+		ret = PTR_ERR(drvdata->csdev);
+		goto out;
+	}
+
+	drvdata->miscdev.name = desc.name;
+	drvdata->miscdev.minor = MISC_DYNAMIC_MINOR;
+	drvdata->miscdev.fops = &tmc_fops;
+	ret = misc_register(&drvdata->miscdev);
+	if (ret)
+		coresight_unregister(drvdata->csdev);
+	else
+		pm_runtime_put(&adev->dev);
+out:
+	return ret;
+}
+
+static void tmc_shutdown(struct amba_device *adev)
+{
+	unsigned long flags;
+	struct tmc_drvdata *drvdata = amba_get_drvdata(adev);
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	if (drvdata->mode == CS_MODE_DISABLED)
+		goto out;
+
+	if (drvdata->config_type == TMC_CONFIG_TYPE_ETR)
+		tmc_etr_disable_hw(drvdata);
+
+	/*
+	 * We do not care about coresight unregister here unlike remove
+	 * callback which is required for making coresight modular since
+	 * the system is going down after this.
+	 */
+out:
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+}
+
+static void tmc_remove(struct amba_device *adev)
+{
+	struct tmc_drvdata *drvdata = dev_get_drvdata(&adev->dev);
+
+	/*
+	 * Since misc_open() holds a refcount on the f_ops, which is
+	 * etb fops in this case, device is there until last file
+	 * handler to this device is closed.
+	 */
+	misc_deregister(&drvdata->miscdev);
+	coresight_unregister(drvdata->csdev);
+}
+
+static const struct amba_id tmc_ids[] = {
+	CS_AMBA_ID(0x000bb961),
+	/* Coresight SoC 600 TMC-ETR/ETS */
+	CS_AMBA_ID_DATA(0x000bb9e8, (unsigned long)CORESIGHT_SOC_600_ETR_CAPS),
+	/* Coresight SoC 600 TMC-ETB */
+	CS_AMBA_ID(0x000bb9e9),
+	/* Coresight SoC 600 TMC-ETF */
+	CS_AMBA_ID(0x000bb9ea),
+	{ 0, 0},
+};
+
+MODULE_DEVICE_TABLE(amba, tmc_ids);
+
+static struct amba_driver tmc_driver = {
+	.drv = {
+		.name   = "coresight-tmc",
+		.owner  = THIS_MODULE,
+		.suppress_bind_attrs = true,
+	},
+	.probe		= tmc_probe,
+	.shutdown	= tmc_shutdown,
+	.remove		= tmc_remove,
+	.id_table	= tmc_ids,
+};
+
+module_amba_driver(tmc_driver);
+
+MODULE_AUTHOR("Pratik Patel <pratikp@codeaurora.org>");
+MODULE_DESCRIPTION("Arm CoreSight Trace Memory Controller driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c
new file mode 100644
index 0000000000..7406b65e2c
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c
@@ -0,0 +1,716 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2016 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#include <linux/atomic.h>
+#include <linux/circ_buf.h>
+#include <linux/coresight.h>
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include "coresight-priv.h"
+#include "coresight-tmc.h"
+#include "coresight-etm-perf.h"
+
+static int tmc_set_etf_buffer(struct coresight_device *csdev,
+			      struct perf_output_handle *handle);
+
+static int __tmc_etb_enable_hw(struct tmc_drvdata *drvdata)
+{
+	int rc = 0;
+
+	CS_UNLOCK(drvdata->base);
+
+	/* Wait for TMCSReady bit to be set */
+	rc = tmc_wait_for_tmcready(drvdata);
+	if (rc) {
+		dev_err(&drvdata->csdev->dev,
+			"Failed to enable: TMC not ready\n");
+		CS_LOCK(drvdata->base);
+		return rc;
+	}
+
+	writel_relaxed(TMC_MODE_CIRCULAR_BUFFER, drvdata->base + TMC_MODE);
+	writel_relaxed(TMC_FFCR_EN_FMT | TMC_FFCR_EN_TI |
+		       TMC_FFCR_FON_FLIN | TMC_FFCR_FON_TRIG_EVT |
+		       TMC_FFCR_TRIGON_TRIGIN,
+		       drvdata->base + TMC_FFCR);
+
+	writel_relaxed(drvdata->trigger_cntr, drvdata->base + TMC_TRG);
+	tmc_enable_hw(drvdata);
+
+	CS_LOCK(drvdata->base);
+	return rc;
+}
+
+static int tmc_etb_enable_hw(struct tmc_drvdata *drvdata)
+{
+	int rc = coresight_claim_device(drvdata->csdev);
+
+	if (rc)
+		return rc;
+
+	rc = __tmc_etb_enable_hw(drvdata);
+	if (rc)
+		coresight_disclaim_device(drvdata->csdev);
+	return rc;
+}
+
+static void tmc_etb_dump_hw(struct tmc_drvdata *drvdata)
+{
+	char *bufp;
+	u32 read_data, lost;
+
+	/* Check if the buffer wrapped around. */
+	lost = readl_relaxed(drvdata->base + TMC_STS) & TMC_STS_FULL;
+	bufp = drvdata->buf;
+	drvdata->len = 0;
+	while (1) {
+		read_data = readl_relaxed(drvdata->base + TMC_RRD);
+		if (read_data == 0xFFFFFFFF)
+			break;
+		memcpy(bufp, &read_data, 4);
+		bufp += 4;
+		drvdata->len += 4;
+	}
+
+	if (lost)
+		coresight_insert_barrier_packet(drvdata->buf);
+	return;
+}
+
+static void __tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
+{
+	CS_UNLOCK(drvdata->base);
+
+	tmc_flush_and_stop(drvdata);
+	/*
+	 * When operating in sysFS mode the content of the buffer needs to be
+	 * read before the TMC is disabled.
+	 */
+	if (drvdata->mode == CS_MODE_SYSFS)
+		tmc_etb_dump_hw(drvdata);
+	tmc_disable_hw(drvdata);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
+{
+	__tmc_etb_disable_hw(drvdata);
+	coresight_disclaim_device(drvdata->csdev);
+}
+
+static int __tmc_etf_enable_hw(struct tmc_drvdata *drvdata)
+{
+	int rc = 0;
+
+	CS_UNLOCK(drvdata->base);
+
+	/* Wait for TMCSReady bit to be set */
+	rc = tmc_wait_for_tmcready(drvdata);
+	if (rc) {
+		dev_err(&drvdata->csdev->dev,
+			"Failed to enable : TMC is not ready\n");
+		CS_LOCK(drvdata->base);
+		return rc;
+	}
+
+	writel_relaxed(TMC_MODE_HARDWARE_FIFO, drvdata->base + TMC_MODE);
+	writel_relaxed(TMC_FFCR_EN_FMT | TMC_FFCR_EN_TI,
+		       drvdata->base + TMC_FFCR);
+	writel_relaxed(0x0, drvdata->base + TMC_BUFWM);
+	tmc_enable_hw(drvdata);
+
+	CS_LOCK(drvdata->base);
+	return rc;
+}
+
+static int tmc_etf_enable_hw(struct tmc_drvdata *drvdata)
+{
+	int rc = coresight_claim_device(drvdata->csdev);
+
+	if (rc)
+		return rc;
+
+	rc = __tmc_etf_enable_hw(drvdata);
+	if (rc)
+		coresight_disclaim_device(drvdata->csdev);
+	return rc;
+}
+
+static void tmc_etf_disable_hw(struct tmc_drvdata *drvdata)
+{
+	struct coresight_device *csdev = drvdata->csdev;
+
+	CS_UNLOCK(drvdata->base);
+
+	tmc_flush_and_stop(drvdata);
+	tmc_disable_hw(drvdata);
+	coresight_disclaim_device_unlocked(csdev);
+	CS_LOCK(drvdata->base);
+}
+
+/*
+ * Return the available trace data in the buffer from @pos, with
+ * a maximum limit of @len, updating the @bufpp on where to
+ * find it.
+ */
+ssize_t tmc_etb_get_sysfs_trace(struct tmc_drvdata *drvdata,
+				loff_t pos, size_t len, char **bufpp)
+{
+	ssize_t actual = len;
+
+	/* Adjust the len to available size @pos */
+	if (pos + actual > drvdata->len)
+		actual = drvdata->len - pos;
+	if (actual > 0)
+		*bufpp = drvdata->buf + pos;
+	return actual;
+}
+
+static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev)
+{
+	int ret = 0;
+	bool used = false;
+	char *buf = NULL;
+	unsigned long flags;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	/*
+	 * If we don't have a buffer release the lock and allocate memory.
+	 * Otherwise keep the lock and move along.
+	 */
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	if (!drvdata->buf) {
+		spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+		/* Allocating the memory here while outside of the spinlock */
+		buf = kzalloc(drvdata->size, GFP_KERNEL);
+		if (!buf)
+			return -ENOMEM;
+
+		/* Let's try again */
+		spin_lock_irqsave(&drvdata->spinlock, flags);
+	}
+
+	if (drvdata->reading) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	/*
+	 * In sysFS mode we can have multiple writers per sink.  Since this
+	 * sink is already enabled no memory is needed and the HW need not be
+	 * touched.
+	 */
+	if (drvdata->mode == CS_MODE_SYSFS) {
+		atomic_inc(&csdev->refcnt);
+		goto out;
+	}
+
+	/*
+	 * If drvdata::buf isn't NULL, memory was allocated for a previous
+	 * trace run but wasn't read.  If so simply zero-out the memory.
+	 * Otherwise use the memory allocated above.
+	 *
+	 * The memory is freed when users read the buffer using the
+	 * /dev/xyz.{etf|etb} interface.  See tmc_read_unprepare_etf() for
+	 * details.
+	 */
+	if (drvdata->buf) {
+		memset(drvdata->buf, 0, drvdata->size);
+	} else {
+		used = true;
+		drvdata->buf = buf;
+	}
+
+	ret = tmc_etb_enable_hw(drvdata);
+	if (!ret) {
+		drvdata->mode = CS_MODE_SYSFS;
+		atomic_inc(&csdev->refcnt);
+	} else {
+		/* Free up the buffer if we failed to enable */
+		used = false;
+	}
+out:
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	/* Free memory outside the spinlock if need be */
+	if (!used)
+		kfree(buf);
+
+	return ret;
+}
+
+static int tmc_enable_etf_sink_perf(struct coresight_device *csdev, void *data)
+{
+	int ret = 0;
+	pid_t pid;
+	unsigned long flags;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct perf_output_handle *handle = data;
+	struct cs_buffers *buf = etm_perf_sink_config(handle);
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	do {
+		ret = -EINVAL;
+		if (drvdata->reading)
+			break;
+		/*
+		 * No need to continue if the ETB/ETF is already operated
+		 * from sysFS.
+		 */
+		if (drvdata->mode == CS_MODE_SYSFS) {
+			ret = -EBUSY;
+			break;
+		}
+
+		/* Get a handle on the pid of the process to monitor */
+		pid = buf->pid;
+
+		if (drvdata->pid != -1 && drvdata->pid != pid) {
+			ret = -EBUSY;
+			break;
+		}
+
+		ret = tmc_set_etf_buffer(csdev, handle);
+		if (ret)
+			break;
+
+		/*
+		 * No HW configuration is needed if the sink is already in
+		 * use for this session.
+		 */
+		if (drvdata->pid == pid) {
+			atomic_inc(&csdev->refcnt);
+			break;
+		}
+
+		ret  = tmc_etb_enable_hw(drvdata);
+		if (!ret) {
+			/* Associate with monitored process. */
+			drvdata->pid = pid;
+			drvdata->mode = CS_MODE_PERF;
+			atomic_inc(&csdev->refcnt);
+		}
+	} while (0);
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	return ret;
+}
+
+static int tmc_enable_etf_sink(struct coresight_device *csdev,
+			       enum cs_mode mode, void *data)
+{
+	int ret;
+
+	switch (mode) {
+	case CS_MODE_SYSFS:
+		ret = tmc_enable_etf_sink_sysfs(csdev);
+		break;
+	case CS_MODE_PERF:
+		ret = tmc_enable_etf_sink_perf(csdev, data);
+		break;
+	/* We shouldn't be here */
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	if (ret)
+		return ret;
+
+	dev_dbg(&csdev->dev, "TMC-ETB/ETF enabled\n");
+	return 0;
+}
+
+static int tmc_disable_etf_sink(struct coresight_device *csdev)
+{
+	unsigned long flags;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	if (drvdata->reading) {
+		spin_unlock_irqrestore(&drvdata->spinlock, flags);
+		return -EBUSY;
+	}
+
+	if (atomic_dec_return(&csdev->refcnt)) {
+		spin_unlock_irqrestore(&drvdata->spinlock, flags);
+		return -EBUSY;
+	}
+
+	/* Complain if we (somehow) got out of sync */
+	WARN_ON_ONCE(drvdata->mode == CS_MODE_DISABLED);
+	tmc_etb_disable_hw(drvdata);
+	/* Dissociate from monitored process. */
+	drvdata->pid = -1;
+	drvdata->mode = CS_MODE_DISABLED;
+
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	dev_dbg(&csdev->dev, "TMC-ETB/ETF disabled\n");
+	return 0;
+}
+
+static int tmc_enable_etf_link(struct coresight_device *csdev,
+			       struct coresight_connection *in,
+			       struct coresight_connection *out)
+{
+	int ret = 0;
+	unsigned long flags;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	bool first_enable = false;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	if (drvdata->reading) {
+		spin_unlock_irqrestore(&drvdata->spinlock, flags);
+		return -EBUSY;
+	}
+
+	if (atomic_read(&csdev->refcnt) == 0) {
+		ret = tmc_etf_enable_hw(drvdata);
+		if (!ret) {
+			drvdata->mode = CS_MODE_SYSFS;
+			first_enable = true;
+		}
+	}
+	if (!ret)
+		atomic_inc(&csdev->refcnt);
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	if (first_enable)
+		dev_dbg(&csdev->dev, "TMC-ETF enabled\n");
+	return ret;
+}
+
+static void tmc_disable_etf_link(struct coresight_device *csdev,
+				 struct coresight_connection *in,
+				 struct coresight_connection *out)
+{
+	unsigned long flags;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	bool last_disable = false;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	if (drvdata->reading) {
+		spin_unlock_irqrestore(&drvdata->spinlock, flags);
+		return;
+	}
+
+	if (atomic_dec_return(&csdev->refcnt) == 0) {
+		tmc_etf_disable_hw(drvdata);
+		drvdata->mode = CS_MODE_DISABLED;
+		last_disable = true;
+	}
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	if (last_disable)
+		dev_dbg(&csdev->dev, "TMC-ETF disabled\n");
+}
+
+static void *tmc_alloc_etf_buffer(struct coresight_device *csdev,
+				  struct perf_event *event, void **pages,
+				  int nr_pages, bool overwrite)
+{
+	int node;
+	struct cs_buffers *buf;
+
+	node = (event->cpu == -1) ? NUMA_NO_NODE : cpu_to_node(event->cpu);
+
+	/* Allocate memory structure for interaction with Perf */
+	buf = kzalloc_node(sizeof(struct cs_buffers), GFP_KERNEL, node);
+	if (!buf)
+		return NULL;
+
+	buf->pid = task_pid_nr(event->owner);
+	buf->snapshot = overwrite;
+	buf->nr_pages = nr_pages;
+	buf->data_pages = pages;
+
+	return buf;
+}
+
+static void tmc_free_etf_buffer(void *config)
+{
+	struct cs_buffers *buf = config;
+
+	kfree(buf);
+}
+
+static int tmc_set_etf_buffer(struct coresight_device *csdev,
+			      struct perf_output_handle *handle)
+{
+	int ret = 0;
+	unsigned long head;
+	struct cs_buffers *buf = etm_perf_sink_config(handle);
+
+	if (!buf)
+		return -EINVAL;
+
+	/* wrap head around to the amount of space we have */
+	head = handle->head & (((unsigned long)buf->nr_pages << PAGE_SHIFT) - 1);
+
+	/* find the page to write to */
+	buf->cur = head / PAGE_SIZE;
+
+	/* and offset within that page */
+	buf->offset = head % PAGE_SIZE;
+
+	local_set(&buf->data_size, 0);
+
+	return ret;
+}
+
+static unsigned long tmc_update_etf_buffer(struct coresight_device *csdev,
+				  struct perf_output_handle *handle,
+				  void *sink_config)
+{
+	bool lost = false;
+	int i, cur;
+	const u32 *barrier;
+	u32 *buf_ptr;
+	u64 read_ptr, write_ptr;
+	u32 status;
+	unsigned long offset, to_read = 0, flags;
+	struct cs_buffers *buf = sink_config;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	if (!buf)
+		return 0;
+
+	/* This shouldn't happen */
+	if (WARN_ON_ONCE(drvdata->mode != CS_MODE_PERF))
+		return 0;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	/* Don't do anything if another tracer is using this sink */
+	if (atomic_read(&csdev->refcnt) != 1)
+		goto out;
+
+	CS_UNLOCK(drvdata->base);
+
+	tmc_flush_and_stop(drvdata);
+
+	read_ptr = tmc_read_rrp(drvdata);
+	write_ptr = tmc_read_rwp(drvdata);
+
+	/*
+	 * Get a hold of the status register and see if a wrap around
+	 * has occurred.  If so adjust things accordingly.
+	 */
+	status = readl_relaxed(drvdata->base + TMC_STS);
+	if (status & TMC_STS_FULL) {
+		lost = true;
+		to_read = drvdata->size;
+	} else {
+		to_read = CIRC_CNT(write_ptr, read_ptr, drvdata->size);
+	}
+
+	/*
+	 * The TMC RAM buffer may be bigger than the space available in the
+	 * perf ring buffer (handle->size).  If so advance the RRP so that we
+	 * get the latest trace data.  In snapshot mode none of that matters
+	 * since we are expected to clobber stale data in favour of the latest
+	 * traces.
+	 */
+	if (!buf->snapshot && to_read > handle->size) {
+		u32 mask = tmc_get_memwidth_mask(drvdata);
+
+		/*
+		 * Make sure the new size is aligned in accordance with the
+		 * requirement explained in function tmc_get_memwidth_mask().
+		 */
+		to_read = handle->size & mask;
+		/* Move the RAM read pointer up */
+		read_ptr = (write_ptr + drvdata->size) - to_read;
+		/* Make sure we are still within our limits */
+		if (read_ptr > (drvdata->size - 1))
+			read_ptr -= drvdata->size;
+		/* Tell the HW */
+		tmc_write_rrp(drvdata, read_ptr);
+		lost = true;
+	}
+
+	/*
+	 * Don't set the TRUNCATED flag in snapshot mode because 1) the
+	 * captured buffer is expected to be truncated and 2) a full buffer
+	 * prevents the event from being re-enabled by the perf core,
+	 * resulting in stale data being send to user space.
+	 */
+	if (!buf->snapshot && lost)
+		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
+
+	cur = buf->cur;
+	offset = buf->offset;
+	barrier = coresight_barrier_pkt;
+
+	/* for every byte to read */
+	for (i = 0; i < to_read; i += 4) {
+		buf_ptr = buf->data_pages[cur] + offset;
+		*buf_ptr = readl_relaxed(drvdata->base + TMC_RRD);
+
+		if (lost && i < CORESIGHT_BARRIER_PKT_SIZE) {
+			*buf_ptr = *barrier;
+			barrier++;
+		}
+
+		offset += 4;
+		if (offset >= PAGE_SIZE) {
+			offset = 0;
+			cur++;
+			/* wrap around at the end of the buffer */
+			cur &= buf->nr_pages - 1;
+		}
+	}
+
+	/*
+	 * In snapshot mode we simply increment the head by the number of byte
+	 * that were written.  User space will figure out how many bytes to get
+	 * from the AUX buffer based on the position of the head.
+	 */
+	if (buf->snapshot)
+		handle->head += to_read;
+
+	/*
+	 * CS_LOCK() contains mb() so it can ensure visibility of the AUX trace
+	 * data before the aux_head is updated via perf_aux_output_end(), which
+	 * is expected by the perf ring buffer.
+	 */
+	CS_LOCK(drvdata->base);
+out:
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	return to_read;
+}
+
+static const struct coresight_ops_sink tmc_etf_sink_ops = {
+	.enable		= tmc_enable_etf_sink,
+	.disable	= tmc_disable_etf_sink,
+	.alloc_buffer	= tmc_alloc_etf_buffer,
+	.free_buffer	= tmc_free_etf_buffer,
+	.update_buffer	= tmc_update_etf_buffer,
+};
+
+static const struct coresight_ops_link tmc_etf_link_ops = {
+	.enable		= tmc_enable_etf_link,
+	.disable	= tmc_disable_etf_link,
+};
+
+const struct coresight_ops tmc_etb_cs_ops = {
+	.sink_ops	= &tmc_etf_sink_ops,
+};
+
+const struct coresight_ops tmc_etf_cs_ops = {
+	.sink_ops	= &tmc_etf_sink_ops,
+	.link_ops	= &tmc_etf_link_ops,
+};
+
+int tmc_read_prepare_etb(struct tmc_drvdata *drvdata)
+{
+	enum tmc_mode mode;
+	int ret = 0;
+	unsigned long flags;
+
+	/* config types are set a boot time and never change */
+	if (WARN_ON_ONCE(drvdata->config_type != TMC_CONFIG_TYPE_ETB &&
+			 drvdata->config_type != TMC_CONFIG_TYPE_ETF))
+		return -EINVAL;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	if (drvdata->reading) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	/* Don't interfere if operated from Perf */
+	if (drvdata->mode == CS_MODE_PERF) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* If drvdata::buf is NULL the trace data has been read already */
+	if (drvdata->buf == NULL) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* Disable the TMC if need be */
+	if (drvdata->mode == CS_MODE_SYSFS) {
+		/* There is no point in reading a TMC in HW FIFO mode */
+		mode = readl_relaxed(drvdata->base + TMC_MODE);
+		if (mode != TMC_MODE_CIRCULAR_BUFFER) {
+			ret = -EINVAL;
+			goto out;
+		}
+		__tmc_etb_disable_hw(drvdata);
+	}
+
+	drvdata->reading = true;
+out:
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	return ret;
+}
+
+int tmc_read_unprepare_etb(struct tmc_drvdata *drvdata)
+{
+	char *buf = NULL;
+	enum tmc_mode mode;
+	unsigned long flags;
+	int rc = 0;
+
+	/* config types are set a boot time and never change */
+	if (WARN_ON_ONCE(drvdata->config_type != TMC_CONFIG_TYPE_ETB &&
+			 drvdata->config_type != TMC_CONFIG_TYPE_ETF))
+		return -EINVAL;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	/* Re-enable the TMC if need be */
+	if (drvdata->mode == CS_MODE_SYSFS) {
+		/* There is no point in reading a TMC in HW FIFO mode */
+		mode = readl_relaxed(drvdata->base + TMC_MODE);
+		if (mode != TMC_MODE_CIRCULAR_BUFFER) {
+			spin_unlock_irqrestore(&drvdata->spinlock, flags);
+			return -EINVAL;
+		}
+		/*
+		 * The trace run will continue with the same allocated trace
+		 * buffer. As such zero-out the buffer so that we don't end
+		 * up with stale data.
+		 *
+		 * Since the tracer is still enabled drvdata::buf
+		 * can't be NULL.
+		 */
+		memset(drvdata->buf, 0, drvdata->size);
+		rc = __tmc_etb_enable_hw(drvdata);
+		if (rc) {
+			spin_unlock_irqrestore(&drvdata->spinlock, flags);
+			return rc;
+		}
+	} else {
+		/*
+		 * The ETB/ETF is not tracing and the buffer was just read.
+		 * As such prepare to free the trace buffer.
+		 */
+		buf = drvdata->buf;
+		drvdata->buf = NULL;
+	}
+
+	drvdata->reading = false;
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	/*
+	 * Free allocated memory outside of the spinlock.  There is no need
+	 * to assert the validity of 'buf' since calling kfree(NULL) is safe.
+	 */
+	kfree(buf);
+
+	return 0;
+}
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
new file mode 100644
index 0000000000..8311e1028d
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -0,0 +1,1806 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2016 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#include <linux/atomic.h>
+#include <linux/coresight.h>
+#include <linux/dma-mapping.h>
+#include <linux/iommu.h>
+#include <linux/idr.h>
+#include <linux/mutex.h>
+#include <linux/refcount.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/vmalloc.h>
+#include "coresight-catu.h"
+#include "coresight-etm-perf.h"
+#include "coresight-priv.h"
+#include "coresight-tmc.h"
+
+struct etr_flat_buf {
+	struct device	*dev;
+	dma_addr_t	daddr;
+	void		*vaddr;
+	size_t		size;
+};
+
+/*
+ * etr_perf_buffer - Perf buffer used for ETR
+ * @drvdata		- The ETR drvdaga this buffer has been allocated for.
+ * @etr_buf		- Actual buffer used by the ETR
+ * @pid			- The PID this etr_perf_buffer belongs to.
+ * @snaphost		- Perf session mode
+ * @nr_pages		- Number of pages in the ring buffer.
+ * @pages		- Array of Pages in the ring buffer.
+ */
+struct etr_perf_buffer {
+	struct tmc_drvdata	*drvdata;
+	struct etr_buf		*etr_buf;
+	pid_t			pid;
+	bool			snapshot;
+	int			nr_pages;
+	void			**pages;
+};
+
+/* Convert the perf index to an offset within the ETR buffer */
+#define PERF_IDX2OFF(idx, buf)		\
+		((idx) % ((unsigned long)(buf)->nr_pages << PAGE_SHIFT))
+
+/* Lower limit for ETR hardware buffer */
+#define TMC_ETR_PERF_MIN_BUF_SIZE	SZ_1M
+
+/*
+ * The TMC ETR SG has a page size of 4K. The SG table contains pointers
+ * to 4KB buffers. However, the OS may use a PAGE_SIZE different from
+ * 4K (i.e, 16KB or 64KB). This implies that a single OS page could
+ * contain more than one SG buffer and tables.
+ *
+ * A table entry has the following format:
+ *
+ * ---Bit31------------Bit4-------Bit1-----Bit0--
+ * |     Address[39:12]    | SBZ |  Entry Type  |
+ * ----------------------------------------------
+ *
+ * Address: Bits [39:12] of a physical page address. Bits [11:0] are
+ *	    always zero.
+ *
+ * Entry type:
+ *	b00 - Reserved.
+ *	b01 - Last entry in the tables, points to 4K page buffer.
+ *	b10 - Normal entry, points to 4K page buffer.
+ *	b11 - Link. The address points to the base of next table.
+ */
+
+typedef u32 sgte_t;
+
+#define ETR_SG_PAGE_SHIFT		12
+#define ETR_SG_PAGE_SIZE		(1UL << ETR_SG_PAGE_SHIFT)
+#define ETR_SG_PAGES_PER_SYSPAGE	(PAGE_SIZE / ETR_SG_PAGE_SIZE)
+#define ETR_SG_PTRS_PER_PAGE		(ETR_SG_PAGE_SIZE / sizeof(sgte_t))
+#define ETR_SG_PTRS_PER_SYSPAGE		(PAGE_SIZE / sizeof(sgte_t))
+
+#define ETR_SG_ET_MASK			0x3
+#define ETR_SG_ET_LAST			0x1
+#define ETR_SG_ET_NORMAL		0x2
+#define ETR_SG_ET_LINK			0x3
+
+#define ETR_SG_ADDR_SHIFT		4
+
+#define ETR_SG_ENTRY(addr, type) \
+	(sgte_t)((((addr) >> ETR_SG_PAGE_SHIFT) << ETR_SG_ADDR_SHIFT) | \
+		 (type & ETR_SG_ET_MASK))
+
+#define ETR_SG_ADDR(entry) \
+	(((dma_addr_t)(entry) >> ETR_SG_ADDR_SHIFT) << ETR_SG_PAGE_SHIFT)
+#define ETR_SG_ET(entry)		((entry) & ETR_SG_ET_MASK)
+
+/*
+ * struct etr_sg_table : ETR SG Table
+ * @sg_table:		Generic SG Table holding the data/table pages.
+ * @hwaddr:		hwaddress used by the TMC, which is the base
+ *			address of the table.
+ */
+struct etr_sg_table {
+	struct tmc_sg_table	*sg_table;
+	dma_addr_t		hwaddr;
+};
+
+/*
+ * tmc_etr_sg_table_entries: Total number of table entries required to map
+ * @nr_pages system pages.
+ *
+ * We need to map @nr_pages * ETR_SG_PAGES_PER_SYSPAGE data pages.
+ * Each TMC page can map (ETR_SG_PTRS_PER_PAGE - 1) buffer pointers,
+ * with the last entry pointing to another page of table entries.
+ * If we spill over to a new page for mapping 1 entry, we could as
+ * well replace the link entry of the previous page with the last entry.
+ */
+static inline unsigned long __attribute_const__
+tmc_etr_sg_table_entries(int nr_pages)
+{
+	unsigned long nr_sgpages = nr_pages * ETR_SG_PAGES_PER_SYSPAGE;
+	unsigned long nr_sglinks = nr_sgpages / (ETR_SG_PTRS_PER_PAGE - 1);
+	/*
+	 * If we spill over to a new page for 1 entry, we could as well
+	 * make it the LAST entry in the previous page, skipping the Link
+	 * address.
+	 */
+	if (nr_sglinks && (nr_sgpages % (ETR_SG_PTRS_PER_PAGE - 1) < 2))
+		nr_sglinks--;
+	return nr_sgpages + nr_sglinks;
+}
+
+/*
+ * tmc_pages_get_offset:  Go through all the pages in the tmc_pages
+ * and map the device address @addr to an offset within the virtual
+ * contiguous buffer.
+ */
+static long
+tmc_pages_get_offset(struct tmc_pages *tmc_pages, dma_addr_t addr)
+{
+	int i;
+	dma_addr_t page_start;
+
+	for (i = 0; i < tmc_pages->nr_pages; i++) {
+		page_start = tmc_pages->daddrs[i];
+		if (addr >= page_start && addr < (page_start + PAGE_SIZE))
+			return i * PAGE_SIZE + (addr - page_start);
+	}
+
+	return -EINVAL;
+}
+
+/*
+ * tmc_pages_free : Unmap and free the pages used by tmc_pages.
+ * If the pages were not allocated in tmc_pages_alloc(), we would
+ * simply drop the refcount.
+ */
+static void tmc_pages_free(struct tmc_pages *tmc_pages,
+			   struct device *dev, enum dma_data_direction dir)
+{
+	int i;
+	struct device *real_dev = dev->parent;
+
+	for (i = 0; i < tmc_pages->nr_pages; i++) {
+		if (tmc_pages->daddrs && tmc_pages->daddrs[i])
+			dma_unmap_page(real_dev, tmc_pages->daddrs[i],
+					 PAGE_SIZE, dir);
+		if (tmc_pages->pages && tmc_pages->pages[i])
+			__free_page(tmc_pages->pages[i]);
+	}
+
+	kfree(tmc_pages->pages);
+	kfree(tmc_pages->daddrs);
+	tmc_pages->pages = NULL;
+	tmc_pages->daddrs = NULL;
+	tmc_pages->nr_pages = 0;
+}
+
+/*
+ * tmc_pages_alloc : Allocate and map pages for a given @tmc_pages.
+ * If @pages is not NULL, the list of page virtual addresses are
+ * used as the data pages. The pages are then dma_map'ed for @dev
+ * with dma_direction @dir.
+ *
+ * Returns 0 upon success, else the error number.
+ */
+static int tmc_pages_alloc(struct tmc_pages *tmc_pages,
+			   struct device *dev, int node,
+			   enum dma_data_direction dir, void **pages)
+{
+	int i, nr_pages;
+	dma_addr_t paddr;
+	struct page *page;
+	struct device *real_dev = dev->parent;
+
+	nr_pages = tmc_pages->nr_pages;
+	tmc_pages->daddrs = kcalloc(nr_pages, sizeof(*tmc_pages->daddrs),
+					 GFP_KERNEL);
+	if (!tmc_pages->daddrs)
+		return -ENOMEM;
+	tmc_pages->pages = kcalloc(nr_pages, sizeof(*tmc_pages->pages),
+					 GFP_KERNEL);
+	if (!tmc_pages->pages) {
+		kfree(tmc_pages->daddrs);
+		tmc_pages->daddrs = NULL;
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < nr_pages; i++) {
+		if (pages && pages[i]) {
+			page = virt_to_page(pages[i]);
+			/* Hold a refcount on the page */
+			get_page(page);
+		} else {
+			page = alloc_pages_node(node,
+						GFP_KERNEL | __GFP_ZERO, 0);
+			if (!page)
+				goto err;
+		}
+		paddr = dma_map_page(real_dev, page, 0, PAGE_SIZE, dir);
+		if (dma_mapping_error(real_dev, paddr))
+			goto err;
+		tmc_pages->daddrs[i] = paddr;
+		tmc_pages->pages[i] = page;
+	}
+	return 0;
+err:
+	tmc_pages_free(tmc_pages, dev, dir);
+	return -ENOMEM;
+}
+
+static inline long
+tmc_sg_get_data_page_offset(struct tmc_sg_table *sg_table, dma_addr_t addr)
+{
+	return tmc_pages_get_offset(&sg_table->data_pages, addr);
+}
+
+static inline void tmc_free_table_pages(struct tmc_sg_table *sg_table)
+{
+	if (sg_table->table_vaddr)
+		vunmap(sg_table->table_vaddr);
+	tmc_pages_free(&sg_table->table_pages, sg_table->dev, DMA_TO_DEVICE);
+}
+
+static void tmc_free_data_pages(struct tmc_sg_table *sg_table)
+{
+	if (sg_table->data_vaddr)
+		vunmap(sg_table->data_vaddr);
+	tmc_pages_free(&sg_table->data_pages, sg_table->dev, DMA_FROM_DEVICE);
+}
+
+void tmc_free_sg_table(struct tmc_sg_table *sg_table)
+{
+	tmc_free_table_pages(sg_table);
+	tmc_free_data_pages(sg_table);
+}
+EXPORT_SYMBOL_GPL(tmc_free_sg_table);
+
+/*
+ * Alloc pages for the table. Since this will be used by the device,
+ * allocate the pages closer to the device (i.e, dev_to_node(dev)
+ * rather than the CPU node).
+ */
+static int tmc_alloc_table_pages(struct tmc_sg_table *sg_table)
+{
+	int rc;
+	struct tmc_pages *table_pages = &sg_table->table_pages;
+
+	rc = tmc_pages_alloc(table_pages, sg_table->dev,
+			     dev_to_node(sg_table->dev),
+			     DMA_TO_DEVICE, NULL);
+	if (rc)
+		return rc;
+	sg_table->table_vaddr = vmap(table_pages->pages,
+				     table_pages->nr_pages,
+				     VM_MAP,
+				     PAGE_KERNEL);
+	if (!sg_table->table_vaddr)
+		rc = -ENOMEM;
+	else
+		sg_table->table_daddr = table_pages->daddrs[0];
+	return rc;
+}
+
+static int tmc_alloc_data_pages(struct tmc_sg_table *sg_table, void **pages)
+{
+	int rc;
+
+	/* Allocate data pages on the node requested by the caller */
+	rc = tmc_pages_alloc(&sg_table->data_pages,
+			     sg_table->dev, sg_table->node,
+			     DMA_FROM_DEVICE, pages);
+	if (!rc) {
+		sg_table->data_vaddr = vmap(sg_table->data_pages.pages,
+					    sg_table->data_pages.nr_pages,
+					    VM_MAP,
+					    PAGE_KERNEL);
+		if (!sg_table->data_vaddr)
+			rc = -ENOMEM;
+	}
+	return rc;
+}
+
+/*
+ * tmc_alloc_sg_table: Allocate and setup dma pages for the TMC SG table
+ * and data buffers. TMC writes to the data buffers and reads from the SG
+ * Table pages.
+ *
+ * @dev		- Coresight device to which page should be DMA mapped.
+ * @node	- Numa node for mem allocations
+ * @nr_tpages	- Number of pages for the table entries.
+ * @nr_dpages	- Number of pages for Data buffer.
+ * @pages	- Optional list of virtual address of pages.
+ */
+struct tmc_sg_table *tmc_alloc_sg_table(struct device *dev,
+					int node,
+					int nr_tpages,
+					int nr_dpages,
+					void **pages)
+{
+	long rc;
+	struct tmc_sg_table *sg_table;
+
+	sg_table = kzalloc(sizeof(*sg_table), GFP_KERNEL);
+	if (!sg_table)
+		return ERR_PTR(-ENOMEM);
+	sg_table->data_pages.nr_pages = nr_dpages;
+	sg_table->table_pages.nr_pages = nr_tpages;
+	sg_table->node = node;
+	sg_table->dev = dev;
+
+	rc  = tmc_alloc_data_pages(sg_table, pages);
+	if (!rc)
+		rc = tmc_alloc_table_pages(sg_table);
+	if (rc) {
+		tmc_free_sg_table(sg_table);
+		kfree(sg_table);
+		return ERR_PTR(rc);
+	}
+
+	return sg_table;
+}
+EXPORT_SYMBOL_GPL(tmc_alloc_sg_table);
+
+/*
+ * tmc_sg_table_sync_data_range: Sync the data buffer written
+ * by the device from @offset upto a @size bytes.
+ */
+void tmc_sg_table_sync_data_range(struct tmc_sg_table *table,
+				  u64 offset, u64 size)
+{
+	int i, index, start;
+	int npages = DIV_ROUND_UP(size, PAGE_SIZE);
+	struct device *real_dev = table->dev->parent;
+	struct tmc_pages *data = &table->data_pages;
+
+	start = offset >> PAGE_SHIFT;
+	for (i = start; i < (start + npages); i++) {
+		index = i % data->nr_pages;
+		dma_sync_single_for_cpu(real_dev, data->daddrs[index],
+					PAGE_SIZE, DMA_FROM_DEVICE);
+	}
+}
+EXPORT_SYMBOL_GPL(tmc_sg_table_sync_data_range);
+
+/* tmc_sg_sync_table: Sync the page table */
+void tmc_sg_table_sync_table(struct tmc_sg_table *sg_table)
+{
+	int i;
+	struct device *real_dev = sg_table->dev->parent;
+	struct tmc_pages *table_pages = &sg_table->table_pages;
+
+	for (i = 0; i < table_pages->nr_pages; i++)
+		dma_sync_single_for_device(real_dev, table_pages->daddrs[i],
+					   PAGE_SIZE, DMA_TO_DEVICE);
+}
+EXPORT_SYMBOL_GPL(tmc_sg_table_sync_table);
+
+/*
+ * tmc_sg_table_get_data: Get the buffer pointer for data @offset
+ * in the SG buffer. The @bufpp is updated to point to the buffer.
+ * Returns :
+ *	the length of linear data available at @offset.
+ *	or
+ *	<= 0 if no data is available.
+ */
+ssize_t tmc_sg_table_get_data(struct tmc_sg_table *sg_table,
+			      u64 offset, size_t len, char **bufpp)
+{
+	size_t size;
+	int pg_idx = offset >> PAGE_SHIFT;
+	int pg_offset = offset & (PAGE_SIZE - 1);
+	struct tmc_pages *data_pages = &sg_table->data_pages;
+
+	size = tmc_sg_table_buf_size(sg_table);
+	if (offset >= size)
+		return -EINVAL;
+
+	/* Make sure we don't go beyond the end */
+	len = (len < (size - offset)) ? len : size - offset;
+	/* Respect the page boundaries */
+	len = (len < (PAGE_SIZE - pg_offset)) ? len : (PAGE_SIZE - pg_offset);
+	if (len > 0)
+		*bufpp = page_address(data_pages->pages[pg_idx]) + pg_offset;
+	return len;
+}
+EXPORT_SYMBOL_GPL(tmc_sg_table_get_data);
+
+#ifdef ETR_SG_DEBUG
+/* Map a dma address to virtual address */
+static unsigned long
+tmc_sg_daddr_to_vaddr(struct tmc_sg_table *sg_table,
+		      dma_addr_t addr, bool table)
+{
+	long offset;
+	unsigned long base;
+	struct tmc_pages *tmc_pages;
+
+	if (table) {
+		tmc_pages = &sg_table->table_pages;
+		base = (unsigned long)sg_table->table_vaddr;
+	} else {
+		tmc_pages = &sg_table->data_pages;
+		base = (unsigned long)sg_table->data_vaddr;
+	}
+
+	offset = tmc_pages_get_offset(tmc_pages, addr);
+	if (offset < 0)
+		return 0;
+	return base + offset;
+}
+
+/* Dump the given sg_table */
+static void tmc_etr_sg_table_dump(struct etr_sg_table *etr_table)
+{
+	sgte_t *ptr;
+	int i = 0;
+	dma_addr_t addr;
+	struct tmc_sg_table *sg_table = etr_table->sg_table;
+
+	ptr = (sgte_t *)tmc_sg_daddr_to_vaddr(sg_table,
+					      etr_table->hwaddr, true);
+	while (ptr) {
+		addr = ETR_SG_ADDR(*ptr);
+		switch (ETR_SG_ET(*ptr)) {
+		case ETR_SG_ET_NORMAL:
+			dev_dbg(sg_table->dev,
+				"%05d: %p\t:[N] 0x%llx\n", i, ptr, addr);
+			ptr++;
+			break;
+		case ETR_SG_ET_LINK:
+			dev_dbg(sg_table->dev,
+				"%05d: *** %p\t:{L} 0x%llx ***\n",
+				 i, ptr, addr);
+			ptr = (sgte_t *)tmc_sg_daddr_to_vaddr(sg_table,
+							      addr, true);
+			break;
+		case ETR_SG_ET_LAST:
+			dev_dbg(sg_table->dev,
+				"%05d: ### %p\t:[L] 0x%llx ###\n",
+				 i, ptr, addr);
+			return;
+		default:
+			dev_dbg(sg_table->dev,
+				"%05d: xxx %p\t:[INVALID] 0x%llx xxx\n",
+				 i, ptr, addr);
+			return;
+		}
+		i++;
+	}
+	dev_dbg(sg_table->dev, "******* End of Table *****\n");
+}
+#else
+static inline void tmc_etr_sg_table_dump(struct etr_sg_table *etr_table) {}
+#endif
+
+/*
+ * Populate the SG Table page table entries from table/data
+ * pages allocated. Each Data page has ETR_SG_PAGES_PER_SYSPAGE SG pages.
+ * So does a Table page. So we keep track of indices of the tables
+ * in each system page and move the pointers accordingly.
+ */
+#define INC_IDX_ROUND(idx, size) ((idx) = ((idx) + 1) % (size))
+static void tmc_etr_sg_table_populate(struct etr_sg_table *etr_table)
+{
+	dma_addr_t paddr;
+	int i, type, nr_entries;
+	int tpidx = 0; /* index to the current system table_page */
+	int sgtidx = 0;	/* index to the sg_table within the current syspage */
+	int sgtentry = 0; /* the entry within the sg_table */
+	int dpidx = 0; /* index to the current system data_page */
+	int spidx = 0; /* index to the SG page within the current data page */
+	sgte_t *ptr; /* pointer to the table entry to fill */
+	struct tmc_sg_table *sg_table = etr_table->sg_table;
+	dma_addr_t *table_daddrs = sg_table->table_pages.daddrs;
+	dma_addr_t *data_daddrs = sg_table->data_pages.daddrs;
+
+	nr_entries = tmc_etr_sg_table_entries(sg_table->data_pages.nr_pages);
+	/*
+	 * Use the contiguous virtual address of the table to update entries.
+	 */
+	ptr = sg_table->table_vaddr;
+	/*
+	 * Fill all the entries, except the last entry to avoid special
+	 * checks within the loop.
+	 */
+	for (i = 0; i < nr_entries - 1; i++) {
+		if (sgtentry == ETR_SG_PTRS_PER_PAGE - 1) {
+			/*
+			 * Last entry in a sg_table page is a link address to
+			 * the next table page. If this sg_table is the last
+			 * one in the system page, it links to the first
+			 * sg_table in the next system page. Otherwise, it
+			 * links to the next sg_table page within the system
+			 * page.
+			 */
+			if (sgtidx == ETR_SG_PAGES_PER_SYSPAGE - 1) {
+				paddr = table_daddrs[tpidx + 1];
+			} else {
+				paddr = table_daddrs[tpidx] +
+					(ETR_SG_PAGE_SIZE * (sgtidx + 1));
+			}
+			type = ETR_SG_ET_LINK;
+		} else {
+			/*
+			 * Update the indices to the data_pages to point to the
+			 * next sg_page in the data buffer.
+			 */
+			type = ETR_SG_ET_NORMAL;
+			paddr = data_daddrs[dpidx] + spidx * ETR_SG_PAGE_SIZE;
+			if (!INC_IDX_ROUND(spidx, ETR_SG_PAGES_PER_SYSPAGE))
+				dpidx++;
+		}
+		*ptr++ = ETR_SG_ENTRY(paddr, type);
+		/*
+		 * Move to the next table pointer, moving the table page index
+		 * if necessary
+		 */
+		if (!INC_IDX_ROUND(sgtentry, ETR_SG_PTRS_PER_PAGE)) {
+			if (!INC_IDX_ROUND(sgtidx, ETR_SG_PAGES_PER_SYSPAGE))
+				tpidx++;
+		}
+	}
+
+	/* Set up the last entry, which is always a data pointer */
+	paddr = data_daddrs[dpidx] + spidx * ETR_SG_PAGE_SIZE;
+	*ptr++ = ETR_SG_ENTRY(paddr, ETR_SG_ET_LAST);
+}
+
+/*
+ * tmc_init_etr_sg_table: Allocate a TMC ETR SG table, data buffer of @size and
+ * populate the table.
+ *
+ * @dev		- Device pointer for the TMC
+ * @node	- NUMA node where the memory should be allocated
+ * @size	- Total size of the data buffer
+ * @pages	- Optional list of page virtual address
+ */
+static struct etr_sg_table *
+tmc_init_etr_sg_table(struct device *dev, int node,
+		      unsigned long size, void **pages)
+{
+	int nr_entries, nr_tpages;
+	int nr_dpages = size >> PAGE_SHIFT;
+	struct tmc_sg_table *sg_table;
+	struct etr_sg_table *etr_table;
+
+	etr_table = kzalloc(sizeof(*etr_table), GFP_KERNEL);
+	if (!etr_table)
+		return ERR_PTR(-ENOMEM);
+	nr_entries = tmc_etr_sg_table_entries(nr_dpages);
+	nr_tpages = DIV_ROUND_UP(nr_entries, ETR_SG_PTRS_PER_SYSPAGE);
+
+	sg_table = tmc_alloc_sg_table(dev, node, nr_tpages, nr_dpages, pages);
+	if (IS_ERR(sg_table)) {
+		kfree(etr_table);
+		return ERR_CAST(sg_table);
+	}
+
+	etr_table->sg_table = sg_table;
+	/* TMC should use table base address for DBA */
+	etr_table->hwaddr = sg_table->table_daddr;
+	tmc_etr_sg_table_populate(etr_table);
+	/* Sync the table pages for the HW */
+	tmc_sg_table_sync_table(sg_table);
+	tmc_etr_sg_table_dump(etr_table);
+
+	return etr_table;
+}
+
+/*
+ * tmc_etr_alloc_flat_buf: Allocate a contiguous DMA buffer.
+ */
+static int tmc_etr_alloc_flat_buf(struct tmc_drvdata *drvdata,
+				  struct etr_buf *etr_buf, int node,
+				  void **pages)
+{
+	struct etr_flat_buf *flat_buf;
+	struct device *real_dev = drvdata->csdev->dev.parent;
+
+	/* We cannot reuse existing pages for flat buf */
+	if (pages)
+		return -EINVAL;
+
+	flat_buf = kzalloc(sizeof(*flat_buf), GFP_KERNEL);
+	if (!flat_buf)
+		return -ENOMEM;
+
+	flat_buf->vaddr = dma_alloc_noncoherent(real_dev, etr_buf->size,
+						&flat_buf->daddr,
+						DMA_FROM_DEVICE,
+						GFP_KERNEL | __GFP_NOWARN);
+	if (!flat_buf->vaddr) {
+		kfree(flat_buf);
+		return -ENOMEM;
+	}
+
+	flat_buf->size = etr_buf->size;
+	flat_buf->dev = &drvdata->csdev->dev;
+	etr_buf->hwaddr = flat_buf->daddr;
+	etr_buf->mode = ETR_MODE_FLAT;
+	etr_buf->private = flat_buf;
+	return 0;
+}
+
+static void tmc_etr_free_flat_buf(struct etr_buf *etr_buf)
+{
+	struct etr_flat_buf *flat_buf = etr_buf->private;
+
+	if (flat_buf && flat_buf->daddr) {
+		struct device *real_dev = flat_buf->dev->parent;
+
+		dma_free_noncoherent(real_dev, etr_buf->size,
+				     flat_buf->vaddr, flat_buf->daddr,
+				     DMA_FROM_DEVICE);
+	}
+	kfree(flat_buf);
+}
+
+static void tmc_etr_sync_flat_buf(struct etr_buf *etr_buf, u64 rrp, u64 rwp)
+{
+	struct etr_flat_buf *flat_buf = etr_buf->private;
+	struct device *real_dev = flat_buf->dev->parent;
+
+	/*
+	 * Adjust the buffer to point to the beginning of the trace data
+	 * and update the available trace data.
+	 */
+	etr_buf->offset = rrp - etr_buf->hwaddr;
+	if (etr_buf->full)
+		etr_buf->len = etr_buf->size;
+	else
+		etr_buf->len = rwp - rrp;
+
+	/*
+	 * The driver always starts tracing at the beginning of the buffer,
+	 * the only reason why we would get a wrap around is when the buffer
+	 * is full.  Sync the entire buffer in one go for this case.
+	 */
+	if (etr_buf->offset + etr_buf->len > etr_buf->size)
+		dma_sync_single_for_cpu(real_dev, flat_buf->daddr,
+					etr_buf->size, DMA_FROM_DEVICE);
+	else
+		dma_sync_single_for_cpu(real_dev,
+					flat_buf->daddr + etr_buf->offset,
+					etr_buf->len, DMA_FROM_DEVICE);
+}
+
+static ssize_t tmc_etr_get_data_flat_buf(struct etr_buf *etr_buf,
+					 u64 offset, size_t len, char **bufpp)
+{
+	struct etr_flat_buf *flat_buf = etr_buf->private;
+
+	*bufpp = (char *)flat_buf->vaddr + offset;
+	/*
+	 * tmc_etr_buf_get_data already adjusts the length to handle
+	 * buffer wrapping around.
+	 */
+	return len;
+}
+
+static const struct etr_buf_operations etr_flat_buf_ops = {
+	.alloc = tmc_etr_alloc_flat_buf,
+	.free = tmc_etr_free_flat_buf,
+	.sync = tmc_etr_sync_flat_buf,
+	.get_data = tmc_etr_get_data_flat_buf,
+};
+
+/*
+ * tmc_etr_alloc_sg_buf: Allocate an SG buf @etr_buf. Setup the parameters
+ * appropriately.
+ */
+static int tmc_etr_alloc_sg_buf(struct tmc_drvdata *drvdata,
+				struct etr_buf *etr_buf, int node,
+				void **pages)
+{
+	struct etr_sg_table *etr_table;
+	struct device *dev = &drvdata->csdev->dev;
+
+	etr_table = tmc_init_etr_sg_table(dev, node,
+					  etr_buf->size, pages);
+	if (IS_ERR(etr_table))
+		return -ENOMEM;
+	etr_buf->hwaddr = etr_table->hwaddr;
+	etr_buf->mode = ETR_MODE_ETR_SG;
+	etr_buf->private = etr_table;
+	return 0;
+}
+
+static void tmc_etr_free_sg_buf(struct etr_buf *etr_buf)
+{
+	struct etr_sg_table *etr_table = etr_buf->private;
+
+	if (etr_table) {
+		tmc_free_sg_table(etr_table->sg_table);
+		kfree(etr_table);
+	}
+}
+
+static ssize_t tmc_etr_get_data_sg_buf(struct etr_buf *etr_buf, u64 offset,
+				       size_t len, char **bufpp)
+{
+	struct etr_sg_table *etr_table = etr_buf->private;
+
+	return tmc_sg_table_get_data(etr_table->sg_table, offset, len, bufpp);
+}
+
+static void tmc_etr_sync_sg_buf(struct etr_buf *etr_buf, u64 rrp, u64 rwp)
+{
+	long r_offset, w_offset;
+	struct etr_sg_table *etr_table = etr_buf->private;
+	struct tmc_sg_table *table = etr_table->sg_table;
+
+	/* Convert hw address to offset in the buffer */
+	r_offset = tmc_sg_get_data_page_offset(table, rrp);
+	if (r_offset < 0) {
+		dev_warn(table->dev,
+			 "Unable to map RRP %llx to offset\n", rrp);
+		etr_buf->len = 0;
+		return;
+	}
+
+	w_offset = tmc_sg_get_data_page_offset(table, rwp);
+	if (w_offset < 0) {
+		dev_warn(table->dev,
+			 "Unable to map RWP %llx to offset\n", rwp);
+		etr_buf->len = 0;
+		return;
+	}
+
+	etr_buf->offset = r_offset;
+	if (etr_buf->full)
+		etr_buf->len = etr_buf->size;
+	else
+		etr_buf->len = ((w_offset < r_offset) ? etr_buf->size : 0) +
+				w_offset - r_offset;
+	tmc_sg_table_sync_data_range(table, r_offset, etr_buf->len);
+}
+
+static const struct etr_buf_operations etr_sg_buf_ops = {
+	.alloc = tmc_etr_alloc_sg_buf,
+	.free = tmc_etr_free_sg_buf,
+	.sync = tmc_etr_sync_sg_buf,
+	.get_data = tmc_etr_get_data_sg_buf,
+};
+
+/*
+ * TMC ETR could be connected to a CATU device, which can provide address
+ * translation service. This is represented by the Output port of the TMC
+ * (ETR) connected to the input port of the CATU.
+ *
+ * Returns	: coresight_device ptr for the CATU device if a CATU is found.
+ *		: NULL otherwise.
+ */
+struct coresight_device *
+tmc_etr_get_catu_device(struct tmc_drvdata *drvdata)
+{
+	struct coresight_device *etr = drvdata->csdev;
+	union coresight_dev_subtype catu_subtype = {
+		.helper_subtype = CORESIGHT_DEV_SUBTYPE_HELPER_CATU
+	};
+
+	if (!IS_ENABLED(CONFIG_CORESIGHT_CATU))
+		return NULL;
+
+	return coresight_find_output_type(etr->pdata, CORESIGHT_DEV_TYPE_HELPER,
+					  catu_subtype);
+}
+EXPORT_SYMBOL_GPL(tmc_etr_get_catu_device);
+
+static const struct etr_buf_operations *etr_buf_ops[] = {
+	[ETR_MODE_FLAT] = &etr_flat_buf_ops,
+	[ETR_MODE_ETR_SG] = &etr_sg_buf_ops,
+	[ETR_MODE_CATU] = NULL,
+};
+
+void tmc_etr_set_catu_ops(const struct etr_buf_operations *catu)
+{
+	etr_buf_ops[ETR_MODE_CATU] = catu;
+}
+EXPORT_SYMBOL_GPL(tmc_etr_set_catu_ops);
+
+void tmc_etr_remove_catu_ops(void)
+{
+	etr_buf_ops[ETR_MODE_CATU] = NULL;
+}
+EXPORT_SYMBOL_GPL(tmc_etr_remove_catu_ops);
+
+static inline int tmc_etr_mode_alloc_buf(int mode,
+					 struct tmc_drvdata *drvdata,
+					 struct etr_buf *etr_buf, int node,
+					 void **pages)
+{
+	int rc = -EINVAL;
+
+	switch (mode) {
+	case ETR_MODE_FLAT:
+	case ETR_MODE_ETR_SG:
+	case ETR_MODE_CATU:
+		if (etr_buf_ops[mode] && etr_buf_ops[mode]->alloc)
+			rc = etr_buf_ops[mode]->alloc(drvdata, etr_buf,
+						      node, pages);
+		if (!rc)
+			etr_buf->ops = etr_buf_ops[mode];
+		return rc;
+	default:
+		return -EINVAL;
+	}
+}
+
+/*
+ * tmc_alloc_etr_buf: Allocate a buffer use by ETR.
+ * @drvdata	: ETR device details.
+ * @size	: size of the requested buffer.
+ * @flags	: Required properties for the buffer.
+ * @node	: Node for memory allocations.
+ * @pages	: An optional list of pages.
+ */
+static struct etr_buf *tmc_alloc_etr_buf(struct tmc_drvdata *drvdata,
+					 ssize_t size, int flags,
+					 int node, void **pages)
+{
+	int rc = -ENOMEM;
+	bool has_etr_sg, has_iommu;
+	bool has_sg, has_catu;
+	struct etr_buf *etr_buf;
+	struct device *dev = &drvdata->csdev->dev;
+
+	has_etr_sg = tmc_etr_has_cap(drvdata, TMC_ETR_SG);
+	has_iommu = iommu_get_domain_for_dev(dev->parent);
+	has_catu = !!tmc_etr_get_catu_device(drvdata);
+
+	has_sg = has_catu || has_etr_sg;
+
+	etr_buf = kzalloc(sizeof(*etr_buf), GFP_KERNEL);
+	if (!etr_buf)
+		return ERR_PTR(-ENOMEM);
+
+	etr_buf->size = size;
+
+	/*
+	 * If we have to use an existing list of pages, we cannot reliably
+	 * use a contiguous DMA memory (even if we have an IOMMU). Otherwise,
+	 * we use the contiguous DMA memory if at least one of the following
+	 * conditions is true:
+	 *  a) The ETR cannot use Scatter-Gather.
+	 *  b) we have a backing IOMMU
+	 *  c) The requested memory size is smaller (< 1M).
+	 *
+	 * Fallback to available mechanisms.
+	 *
+	 */
+	if (!pages &&
+	    (!has_sg || has_iommu || size < SZ_1M))
+		rc = tmc_etr_mode_alloc_buf(ETR_MODE_FLAT, drvdata,
+					    etr_buf, node, pages);
+	if (rc && has_etr_sg)
+		rc = tmc_etr_mode_alloc_buf(ETR_MODE_ETR_SG, drvdata,
+					    etr_buf, node, pages);
+	if (rc && has_catu)
+		rc = tmc_etr_mode_alloc_buf(ETR_MODE_CATU, drvdata,
+					    etr_buf, node, pages);
+	if (rc) {
+		kfree(etr_buf);
+		return ERR_PTR(rc);
+	}
+
+	refcount_set(&etr_buf->refcount, 1);
+	dev_dbg(dev, "allocated buffer of size %ldKB in mode %d\n",
+		(unsigned long)size >> 10, etr_buf->mode);
+	return etr_buf;
+}
+
+static void tmc_free_etr_buf(struct etr_buf *etr_buf)
+{
+	WARN_ON(!etr_buf->ops || !etr_buf->ops->free);
+	etr_buf->ops->free(etr_buf);
+	kfree(etr_buf);
+}
+
+/*
+ * tmc_etr_buf_get_data: Get the pointer the trace data at @offset
+ * with a maximum of @len bytes.
+ * Returns: The size of the linear data available @pos, with *bufpp
+ * updated to point to the buffer.
+ */
+static ssize_t tmc_etr_buf_get_data(struct etr_buf *etr_buf,
+				    u64 offset, size_t len, char **bufpp)
+{
+	/* Adjust the length to limit this transaction to end of buffer */
+	len = (len < (etr_buf->size - offset)) ? len : etr_buf->size - offset;
+
+	return etr_buf->ops->get_data(etr_buf, (u64)offset, len, bufpp);
+}
+
+static inline s64
+tmc_etr_buf_insert_barrier_packet(struct etr_buf *etr_buf, u64 offset)
+{
+	ssize_t len;
+	char *bufp;
+
+	len = tmc_etr_buf_get_data(etr_buf, offset,
+				   CORESIGHT_BARRIER_PKT_SIZE, &bufp);
+	if (WARN_ON(len < 0 || len < CORESIGHT_BARRIER_PKT_SIZE))
+		return -EINVAL;
+	coresight_insert_barrier_packet(bufp);
+	return offset + CORESIGHT_BARRIER_PKT_SIZE;
+}
+
+/*
+ * tmc_sync_etr_buf: Sync the trace buffer availability with drvdata.
+ * Makes sure the trace data is synced to the memory for consumption.
+ * @etr_buf->offset will hold the offset to the beginning of the trace data
+ * within the buffer, with @etr_buf->len bytes to consume.
+ */
+static void tmc_sync_etr_buf(struct tmc_drvdata *drvdata)
+{
+	struct etr_buf *etr_buf = drvdata->etr_buf;
+	u64 rrp, rwp;
+	u32 status;
+
+	rrp = tmc_read_rrp(drvdata);
+	rwp = tmc_read_rwp(drvdata);
+	status = readl_relaxed(drvdata->base + TMC_STS);
+
+	/*
+	 * If there were memory errors in the session, truncate the
+	 * buffer.
+	 */
+	if (WARN_ON_ONCE(status & TMC_STS_MEMERR)) {
+		dev_dbg(&drvdata->csdev->dev,
+			"tmc memory error detected, truncating buffer\n");
+		etr_buf->len = 0;
+		etr_buf->full = false;
+		return;
+	}
+
+	etr_buf->full = !!(status & TMC_STS_FULL);
+
+	WARN_ON(!etr_buf->ops || !etr_buf->ops->sync);
+
+	etr_buf->ops->sync(etr_buf, rrp, rwp);
+}
+
+static int __tmc_etr_enable_hw(struct tmc_drvdata *drvdata)
+{
+	u32 axictl, sts;
+	struct etr_buf *etr_buf = drvdata->etr_buf;
+	int rc = 0;
+
+	CS_UNLOCK(drvdata->base);
+
+	/* Wait for TMCSReady bit to be set */
+	rc = tmc_wait_for_tmcready(drvdata);
+	if (rc) {
+		dev_err(&drvdata->csdev->dev,
+			"Failed to enable : TMC not ready\n");
+		CS_LOCK(drvdata->base);
+		return rc;
+	}
+
+	writel_relaxed(etr_buf->size / 4, drvdata->base + TMC_RSZ);
+	writel_relaxed(TMC_MODE_CIRCULAR_BUFFER, drvdata->base + TMC_MODE);
+
+	axictl = readl_relaxed(drvdata->base + TMC_AXICTL);
+	axictl &= ~TMC_AXICTL_CLEAR_MASK;
+	axictl |= TMC_AXICTL_PROT_CTL_B1;
+	axictl |= TMC_AXICTL_WR_BURST(drvdata->max_burst_size);
+	axictl |= TMC_AXICTL_AXCACHE_OS;
+
+	if (tmc_etr_has_cap(drvdata, TMC_ETR_AXI_ARCACHE)) {
+		axictl &= ~TMC_AXICTL_ARCACHE_MASK;
+		axictl |= TMC_AXICTL_ARCACHE_OS;
+	}
+
+	if (etr_buf->mode == ETR_MODE_ETR_SG)
+		axictl |= TMC_AXICTL_SCT_GAT_MODE;
+
+	writel_relaxed(axictl, drvdata->base + TMC_AXICTL);
+	tmc_write_dba(drvdata, etr_buf->hwaddr);
+	/*
+	 * If the TMC pointers must be programmed before the session,
+	 * we have to set it properly (i.e, RRP/RWP to base address and
+	 * STS to "not full").
+	 */
+	if (tmc_etr_has_cap(drvdata, TMC_ETR_SAVE_RESTORE)) {
+		tmc_write_rrp(drvdata, etr_buf->hwaddr);
+		tmc_write_rwp(drvdata, etr_buf->hwaddr);
+		sts = readl_relaxed(drvdata->base + TMC_STS) & ~TMC_STS_FULL;
+		writel_relaxed(sts, drvdata->base + TMC_STS);
+	}
+
+	writel_relaxed(TMC_FFCR_EN_FMT | TMC_FFCR_EN_TI |
+		       TMC_FFCR_FON_FLIN | TMC_FFCR_FON_TRIG_EVT |
+		       TMC_FFCR_TRIGON_TRIGIN,
+		       drvdata->base + TMC_FFCR);
+	writel_relaxed(drvdata->trigger_cntr, drvdata->base + TMC_TRG);
+	tmc_enable_hw(drvdata);
+
+	CS_LOCK(drvdata->base);
+	return rc;
+}
+
+static int tmc_etr_enable_hw(struct tmc_drvdata *drvdata,
+			     struct etr_buf *etr_buf)
+{
+	int rc;
+
+	/* Callers should provide an appropriate buffer for use */
+	if (WARN_ON(!etr_buf))
+		return -EINVAL;
+
+	if ((etr_buf->mode == ETR_MODE_ETR_SG) &&
+	    WARN_ON(!tmc_etr_has_cap(drvdata, TMC_ETR_SG)))
+		return -EINVAL;
+
+	if (WARN_ON(drvdata->etr_buf))
+		return -EBUSY;
+
+	rc = coresight_claim_device(drvdata->csdev);
+	if (!rc) {
+		drvdata->etr_buf = etr_buf;
+		rc = __tmc_etr_enable_hw(drvdata);
+		if (rc) {
+			drvdata->etr_buf = NULL;
+			coresight_disclaim_device(drvdata->csdev);
+		}
+	}
+
+	return rc;
+}
+
+/*
+ * Return the available trace data in the buffer (starts at etr_buf->offset,
+ * limited by etr_buf->len) from @pos, with a maximum limit of @len,
+ * also updating the @bufpp on where to find it. Since the trace data
+ * starts at anywhere in the buffer, depending on the RRP, we adjust the
+ * @len returned to handle buffer wrapping around.
+ *
+ * We are protected here by drvdata->reading != 0, which ensures the
+ * sysfs_buf stays alive.
+ */
+ssize_t tmc_etr_get_sysfs_trace(struct tmc_drvdata *drvdata,
+				loff_t pos, size_t len, char **bufpp)
+{
+	s64 offset;
+	ssize_t actual = len;
+	struct etr_buf *etr_buf = drvdata->sysfs_buf;
+
+	if (pos + actual > etr_buf->len)
+		actual = etr_buf->len - pos;
+	if (actual <= 0)
+		return actual;
+
+	/* Compute the offset from which we read the data */
+	offset = etr_buf->offset + pos;
+	if (offset >= etr_buf->size)
+		offset -= etr_buf->size;
+	return tmc_etr_buf_get_data(etr_buf, offset, actual, bufpp);
+}
+
+static struct etr_buf *
+tmc_etr_setup_sysfs_buf(struct tmc_drvdata *drvdata)
+{
+	return tmc_alloc_etr_buf(drvdata, drvdata->size,
+				 0, cpu_to_node(0), NULL);
+}
+
+static void
+tmc_etr_free_sysfs_buf(struct etr_buf *buf)
+{
+	if (buf)
+		tmc_free_etr_buf(buf);
+}
+
+static void tmc_etr_sync_sysfs_buf(struct tmc_drvdata *drvdata)
+{
+	struct etr_buf *etr_buf = drvdata->etr_buf;
+
+	if (WARN_ON(drvdata->sysfs_buf != etr_buf)) {
+		tmc_etr_free_sysfs_buf(drvdata->sysfs_buf);
+		drvdata->sysfs_buf = NULL;
+	} else {
+		tmc_sync_etr_buf(drvdata);
+		/*
+		 * Insert barrier packets at the beginning, if there was
+		 * an overflow.
+		 */
+		if (etr_buf->full)
+			tmc_etr_buf_insert_barrier_packet(etr_buf,
+							  etr_buf->offset);
+	}
+}
+
+static void __tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
+{
+	CS_UNLOCK(drvdata->base);
+
+	tmc_flush_and_stop(drvdata);
+	/*
+	 * When operating in sysFS mode the content of the buffer needs to be
+	 * read before the TMC is disabled.
+	 */
+	if (drvdata->mode == CS_MODE_SYSFS)
+		tmc_etr_sync_sysfs_buf(drvdata);
+
+	tmc_disable_hw(drvdata);
+
+	CS_LOCK(drvdata->base);
+
+}
+
+void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
+{
+	__tmc_etr_disable_hw(drvdata);
+	coresight_disclaim_device(drvdata->csdev);
+	/* Reset the ETR buf used by hardware */
+	drvdata->etr_buf = NULL;
+}
+
+static struct etr_buf *tmc_etr_get_sysfs_buffer(struct coresight_device *csdev)
+{
+	int ret = 0;
+	unsigned long flags;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct etr_buf *sysfs_buf = NULL, *new_buf = NULL, *free_buf = NULL;
+
+	/*
+	 * If we are enabling the ETR from disabled state, we need to make
+	 * sure we have a buffer with the right size. The etr_buf is not reset
+	 * immediately after we stop the tracing in SYSFS mode as we wait for
+	 * the user to collect the data. We may be able to reuse the existing
+	 * buffer, provided the size matches. Any allocation has to be done
+	 * with the lock released.
+	 */
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	sysfs_buf = READ_ONCE(drvdata->sysfs_buf);
+	if (!sysfs_buf || (sysfs_buf->size != drvdata->size)) {
+		spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+		/* Allocate memory with the locks released */
+		free_buf = new_buf = tmc_etr_setup_sysfs_buf(drvdata);
+		if (IS_ERR(new_buf))
+			return new_buf;
+
+		/* Let's try again */
+		spin_lock_irqsave(&drvdata->spinlock, flags);
+	}
+
+	if (drvdata->reading || drvdata->mode == CS_MODE_PERF) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	/*
+	 * If we don't have a buffer or it doesn't match the requested size,
+	 * use the buffer allocated above. Otherwise reuse the existing buffer.
+	 */
+	sysfs_buf = READ_ONCE(drvdata->sysfs_buf);
+	if (!sysfs_buf || (new_buf && sysfs_buf->size != new_buf->size)) {
+		free_buf = sysfs_buf;
+		drvdata->sysfs_buf = new_buf;
+	}
+
+out:
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	/* Free memory outside the spinlock if need be */
+	if (free_buf)
+		tmc_etr_free_sysfs_buf(free_buf);
+	return ret ? ERR_PTR(ret) : drvdata->sysfs_buf;
+}
+
+static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev)
+{
+	int ret = 0;
+	unsigned long flags;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct etr_buf *sysfs_buf = tmc_etr_get_sysfs_buffer(csdev);
+
+	if (IS_ERR(sysfs_buf))
+		return PTR_ERR(sysfs_buf);
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	/*
+	 * In sysFS mode we can have multiple writers per sink.  Since this
+	 * sink is already enabled no memory is needed and the HW need not be
+	 * touched, even if the buffer size has changed.
+	 */
+	if (drvdata->mode == CS_MODE_SYSFS) {
+		atomic_inc(&csdev->refcnt);
+		goto out;
+	}
+
+	ret = tmc_etr_enable_hw(drvdata, sysfs_buf);
+	if (!ret) {
+		drvdata->mode = CS_MODE_SYSFS;
+		atomic_inc(&csdev->refcnt);
+	}
+
+out:
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	if (!ret)
+		dev_dbg(&csdev->dev, "TMC-ETR enabled\n");
+
+	return ret;
+}
+
+struct etr_buf *tmc_etr_get_buffer(struct coresight_device *csdev,
+				   enum cs_mode mode, void *data)
+{
+	struct perf_output_handle *handle = data;
+	struct etr_perf_buffer *etr_perf;
+
+	switch (mode) {
+	case CS_MODE_SYSFS:
+		return tmc_etr_get_sysfs_buffer(csdev);
+	case CS_MODE_PERF:
+		etr_perf = etm_perf_sink_config(handle);
+		if (WARN_ON(!etr_perf || !etr_perf->etr_buf))
+			return ERR_PTR(-EINVAL);
+		return etr_perf->etr_buf;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+}
+EXPORT_SYMBOL_GPL(tmc_etr_get_buffer);
+
+/*
+ * alloc_etr_buf: Allocate ETR buffer for use by perf.
+ * The size of the hardware buffer is dependent on the size configured
+ * via sysfs and the perf ring buffer size. We prefer to allocate the
+ * largest possible size, scaling down the size by half until it
+ * reaches a minimum limit (1M), beyond which we give up.
+ */
+static struct etr_buf *
+alloc_etr_buf(struct tmc_drvdata *drvdata, struct perf_event *event,
+	      int nr_pages, void **pages, bool snapshot)
+{
+	int node;
+	struct etr_buf *etr_buf;
+	unsigned long size;
+
+	node = (event->cpu == -1) ? NUMA_NO_NODE : cpu_to_node(event->cpu);
+	/*
+	 * Try to match the perf ring buffer size if it is larger
+	 * than the size requested via sysfs.
+	 */
+	if ((nr_pages << PAGE_SHIFT) > drvdata->size) {
+		etr_buf = tmc_alloc_etr_buf(drvdata, ((ssize_t)nr_pages << PAGE_SHIFT),
+					    0, node, NULL);
+		if (!IS_ERR(etr_buf))
+			goto done;
+	}
+
+	/*
+	 * Else switch to configured size for this ETR
+	 * and scale down until we hit the minimum limit.
+	 */
+	size = drvdata->size;
+	do {
+		etr_buf = tmc_alloc_etr_buf(drvdata, size, 0, node, NULL);
+		if (!IS_ERR(etr_buf))
+			goto done;
+		size /= 2;
+	} while (size >= TMC_ETR_PERF_MIN_BUF_SIZE);
+
+	return ERR_PTR(-ENOMEM);
+
+done:
+	return etr_buf;
+}
+
+static struct etr_buf *
+get_perf_etr_buf_cpu_wide(struct tmc_drvdata *drvdata,
+			  struct perf_event *event, int nr_pages,
+			  void **pages, bool snapshot)
+{
+	int ret;
+	pid_t pid = task_pid_nr(event->owner);
+	struct etr_buf *etr_buf;
+
+retry:
+	/*
+	 * An etr_perf_buffer is associated with an event and holds a reference
+	 * to the AUX ring buffer that was created for that event.  In CPU-wide
+	 * N:1 mode multiple events (one per CPU), each with its own AUX ring
+	 * buffer, share a sink.  As such an etr_perf_buffer is created for each
+	 * event but a single etr_buf associated with the ETR is shared between
+	 * them.  The last event in a trace session will copy the content of the
+	 * etr_buf to its AUX ring buffer.  Ring buffer associated to other
+	 * events are simply not used an freed as events are destoyed.  We still
+	 * need to allocate a ring buffer for each event since we don't know
+	 * which event will be last.
+	 */
+
+	/*
+	 * The first thing to do here is check if an etr_buf has already been
+	 * allocated for this session.  If so it is shared with this event,
+	 * otherwise it is created.
+	 */
+	mutex_lock(&drvdata->idr_mutex);
+	etr_buf = idr_find(&drvdata->idr, pid);
+	if (etr_buf) {
+		refcount_inc(&etr_buf->refcount);
+		mutex_unlock(&drvdata->idr_mutex);
+		return etr_buf;
+	}
+
+	/* If we made it here no buffer has been allocated, do so now. */
+	mutex_unlock(&drvdata->idr_mutex);
+
+	etr_buf = alloc_etr_buf(drvdata, event, nr_pages, pages, snapshot);
+	if (IS_ERR(etr_buf))
+		return etr_buf;
+
+	/* Now that we have a buffer, add it to the IDR. */
+	mutex_lock(&drvdata->idr_mutex);
+	ret = idr_alloc(&drvdata->idr, etr_buf, pid, pid + 1, GFP_KERNEL);
+	mutex_unlock(&drvdata->idr_mutex);
+
+	/* Another event with this session ID has allocated this buffer. */
+	if (ret == -ENOSPC) {
+		tmc_free_etr_buf(etr_buf);
+		goto retry;
+	}
+
+	/* The IDR can't allocate room for a new session, abandon ship. */
+	if (ret == -ENOMEM) {
+		tmc_free_etr_buf(etr_buf);
+		return ERR_PTR(ret);
+	}
+
+
+	return etr_buf;
+}
+
+static struct etr_buf *
+get_perf_etr_buf_per_thread(struct tmc_drvdata *drvdata,
+			    struct perf_event *event, int nr_pages,
+			    void **pages, bool snapshot)
+{
+	/*
+	 * In per-thread mode the etr_buf isn't shared, so just go ahead
+	 * with memory allocation.
+	 */
+	return alloc_etr_buf(drvdata, event, nr_pages, pages, snapshot);
+}
+
+static struct etr_buf *
+get_perf_etr_buf(struct tmc_drvdata *drvdata, struct perf_event *event,
+		 int nr_pages, void **pages, bool snapshot)
+{
+	if (event->cpu == -1)
+		return get_perf_etr_buf_per_thread(drvdata, event, nr_pages,
+						   pages, snapshot);
+
+	return get_perf_etr_buf_cpu_wide(drvdata, event, nr_pages,
+					 pages, snapshot);
+}
+
+static struct etr_perf_buffer *
+tmc_etr_setup_perf_buf(struct tmc_drvdata *drvdata, struct perf_event *event,
+		       int nr_pages, void **pages, bool snapshot)
+{
+	int node;
+	struct etr_buf *etr_buf;
+	struct etr_perf_buffer *etr_perf;
+
+	node = (event->cpu == -1) ? NUMA_NO_NODE : cpu_to_node(event->cpu);
+
+	etr_perf = kzalloc_node(sizeof(*etr_perf), GFP_KERNEL, node);
+	if (!etr_perf)
+		return ERR_PTR(-ENOMEM);
+
+	etr_buf = get_perf_etr_buf(drvdata, event, nr_pages, pages, snapshot);
+	if (!IS_ERR(etr_buf))
+		goto done;
+
+	kfree(etr_perf);
+	return ERR_PTR(-ENOMEM);
+
+done:
+	/*
+	 * Keep a reference to the ETR this buffer has been allocated for
+	 * in order to have access to the IDR in tmc_free_etr_buffer().
+	 */
+	etr_perf->drvdata = drvdata;
+	etr_perf->etr_buf = etr_buf;
+
+	return etr_perf;
+}
+
+
+static void *tmc_alloc_etr_buffer(struct coresight_device *csdev,
+				  struct perf_event *event, void **pages,
+				  int nr_pages, bool snapshot)
+{
+	struct etr_perf_buffer *etr_perf;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	etr_perf = tmc_etr_setup_perf_buf(drvdata, event,
+					  nr_pages, pages, snapshot);
+	if (IS_ERR(etr_perf)) {
+		dev_dbg(&csdev->dev, "Unable to allocate ETR buffer\n");
+		return NULL;
+	}
+
+	etr_perf->pid = task_pid_nr(event->owner);
+	etr_perf->snapshot = snapshot;
+	etr_perf->nr_pages = nr_pages;
+	etr_perf->pages = pages;
+
+	return etr_perf;
+}
+
+static void tmc_free_etr_buffer(void *config)
+{
+	struct etr_perf_buffer *etr_perf = config;
+	struct tmc_drvdata *drvdata = etr_perf->drvdata;
+	struct etr_buf *buf, *etr_buf = etr_perf->etr_buf;
+
+	if (!etr_buf)
+		goto free_etr_perf_buffer;
+
+	mutex_lock(&drvdata->idr_mutex);
+	/* If we are not the last one to use the buffer, don't touch it. */
+	if (!refcount_dec_and_test(&etr_buf->refcount)) {
+		mutex_unlock(&drvdata->idr_mutex);
+		goto free_etr_perf_buffer;
+	}
+
+	/* We are the last one, remove from the IDR and free the buffer. */
+	buf = idr_remove(&drvdata->idr, etr_perf->pid);
+	mutex_unlock(&drvdata->idr_mutex);
+
+	/*
+	 * Something went very wrong if the buffer associated with this ID
+	 * is not the same in the IDR.  Leak to avoid use after free.
+	 */
+	if (buf && WARN_ON(buf != etr_buf))
+		goto free_etr_perf_buffer;
+
+	tmc_free_etr_buf(etr_perf->etr_buf);
+
+free_etr_perf_buffer:
+	kfree(etr_perf);
+}
+
+/*
+ * tmc_etr_sync_perf_buffer: Copy the actual trace data from the hardware
+ * buffer to the perf ring buffer.
+ */
+static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf,
+				     unsigned long head,
+				     unsigned long src_offset,
+				     unsigned long to_copy)
+{
+	long bytes;
+	long pg_idx, pg_offset;
+	char **dst_pages, *src_buf;
+	struct etr_buf *etr_buf = etr_perf->etr_buf;
+
+	head = PERF_IDX2OFF(head, etr_perf);
+	pg_idx = head >> PAGE_SHIFT;
+	pg_offset = head & (PAGE_SIZE - 1);
+	dst_pages = (char **)etr_perf->pages;
+
+	while (to_copy > 0) {
+		/*
+		 * In one iteration, we can copy minimum of :
+		 *  1) what is available in the source buffer,
+		 *  2) what is available in the source buffer, before it
+		 *     wraps around.
+		 *  3) what is available in the destination page.
+		 * in one iteration.
+		 */
+		if (src_offset >= etr_buf->size)
+			src_offset -= etr_buf->size;
+		bytes = tmc_etr_buf_get_data(etr_buf, src_offset, to_copy,
+					     &src_buf);
+		if (WARN_ON_ONCE(bytes <= 0))
+			break;
+		bytes = min(bytes, (long)(PAGE_SIZE - pg_offset));
+
+		memcpy(dst_pages[pg_idx] + pg_offset, src_buf, bytes);
+
+		to_copy -= bytes;
+
+		/* Move destination pointers */
+		pg_offset += bytes;
+		if (pg_offset == PAGE_SIZE) {
+			pg_offset = 0;
+			if (++pg_idx == etr_perf->nr_pages)
+				pg_idx = 0;
+		}
+
+		/* Move source pointers */
+		src_offset += bytes;
+	}
+}
+
+/*
+ * tmc_update_etr_buffer : Update the perf ring buffer with the
+ * available trace data. We use software double buffering at the moment.
+ *
+ * TODO: Add support for reusing the perf ring buffer.
+ */
+static unsigned long
+tmc_update_etr_buffer(struct coresight_device *csdev,
+		      struct perf_output_handle *handle,
+		      void *config)
+{
+	bool lost = false;
+	unsigned long flags, offset, size = 0;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct etr_perf_buffer *etr_perf = config;
+	struct etr_buf *etr_buf = etr_perf->etr_buf;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	/* Don't do anything if another tracer is using this sink */
+	if (atomic_read(&csdev->refcnt) != 1) {
+		spin_unlock_irqrestore(&drvdata->spinlock, flags);
+		goto out;
+	}
+
+	if (WARN_ON(drvdata->perf_buf != etr_buf)) {
+		lost = true;
+		spin_unlock_irqrestore(&drvdata->spinlock, flags);
+		goto out;
+	}
+
+	CS_UNLOCK(drvdata->base);
+
+	tmc_flush_and_stop(drvdata);
+	tmc_sync_etr_buf(drvdata);
+
+	CS_LOCK(drvdata->base);
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	lost = etr_buf->full;
+	offset = etr_buf->offset;
+	size = etr_buf->len;
+
+	/*
+	 * The ETR buffer may be bigger than the space available in the
+	 * perf ring buffer (handle->size).  If so advance the offset so that we
+	 * get the latest trace data.  In snapshot mode none of that matters
+	 * since we are expected to clobber stale data in favour of the latest
+	 * traces.
+	 */
+	if (!etr_perf->snapshot && size > handle->size) {
+		u32 mask = tmc_get_memwidth_mask(drvdata);
+
+		/*
+		 * Make sure the new size is aligned in accordance with the
+		 * requirement explained in function tmc_get_memwidth_mask().
+		 */
+		size = handle->size & mask;
+		offset = etr_buf->offset + etr_buf->len - size;
+
+		if (offset >= etr_buf->size)
+			offset -= etr_buf->size;
+		lost = true;
+	}
+
+	/* Insert barrier packets at the beginning, if there was an overflow */
+	if (lost)
+		tmc_etr_buf_insert_barrier_packet(etr_buf, offset);
+	tmc_etr_sync_perf_buffer(etr_perf, handle->head, offset, size);
+
+	/*
+	 * In snapshot mode we simply increment the head by the number of byte
+	 * that were written.  User space will figure out how many bytes to get
+	 * from the AUX buffer based on the position of the head.
+	 */
+	if (etr_perf->snapshot)
+		handle->head += size;
+
+	/*
+	 * Ensure that the AUX trace data is visible before the aux_head
+	 * is updated via perf_aux_output_end(), as expected by the
+	 * perf ring buffer.
+	 */
+	smp_wmb();
+
+out:
+	/*
+	 * Don't set the TRUNCATED flag in snapshot mode because 1) the
+	 * captured buffer is expected to be truncated and 2) a full buffer
+	 * prevents the event from being re-enabled by the perf core,
+	 * resulting in stale data being send to user space.
+	 */
+	if (!etr_perf->snapshot && lost)
+		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
+	return size;
+}
+
+static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data)
+{
+	int rc = 0;
+	pid_t pid;
+	unsigned long flags;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct perf_output_handle *handle = data;
+	struct etr_perf_buffer *etr_perf = etm_perf_sink_config(handle);
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	 /* Don't use this sink if it is already claimed by sysFS */
+	if (drvdata->mode == CS_MODE_SYSFS) {
+		rc = -EBUSY;
+		goto unlock_out;
+	}
+
+	if (WARN_ON(!etr_perf || !etr_perf->etr_buf)) {
+		rc = -EINVAL;
+		goto unlock_out;
+	}
+
+	/* Get a handle on the pid of the process to monitor */
+	pid = etr_perf->pid;
+
+	/* Do not proceed if this device is associated with another session */
+	if (drvdata->pid != -1 && drvdata->pid != pid) {
+		rc = -EBUSY;
+		goto unlock_out;
+	}
+
+	/*
+	 * No HW configuration is needed if the sink is already in
+	 * use for this session.
+	 */
+	if (drvdata->pid == pid) {
+		atomic_inc(&csdev->refcnt);
+		goto unlock_out;
+	}
+
+	rc = tmc_etr_enable_hw(drvdata, etr_perf->etr_buf);
+	if (!rc) {
+		/* Associate with monitored process. */
+		drvdata->pid = pid;
+		drvdata->mode = CS_MODE_PERF;
+		drvdata->perf_buf = etr_perf->etr_buf;
+		atomic_inc(&csdev->refcnt);
+	}
+
+unlock_out:
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+	return rc;
+}
+
+static int tmc_enable_etr_sink(struct coresight_device *csdev,
+			       enum cs_mode mode, void *data)
+{
+	switch (mode) {
+	case CS_MODE_SYSFS:
+		return tmc_enable_etr_sink_sysfs(csdev);
+	case CS_MODE_PERF:
+		return tmc_enable_etr_sink_perf(csdev, data);
+	default:
+		return -EINVAL;
+	}
+}
+
+static int tmc_disable_etr_sink(struct coresight_device *csdev)
+{
+	unsigned long flags;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	if (drvdata->reading) {
+		spin_unlock_irqrestore(&drvdata->spinlock, flags);
+		return -EBUSY;
+	}
+
+	if (atomic_dec_return(&csdev->refcnt)) {
+		spin_unlock_irqrestore(&drvdata->spinlock, flags);
+		return -EBUSY;
+	}
+
+	/* Complain if we (somehow) got out of sync */
+	WARN_ON_ONCE(drvdata->mode == CS_MODE_DISABLED);
+	tmc_etr_disable_hw(drvdata);
+	/* Dissociate from monitored process. */
+	drvdata->pid = -1;
+	drvdata->mode = CS_MODE_DISABLED;
+	/* Reset perf specific data */
+	drvdata->perf_buf = NULL;
+
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	dev_dbg(&csdev->dev, "TMC-ETR disabled\n");
+	return 0;
+}
+
+static const struct coresight_ops_sink tmc_etr_sink_ops = {
+	.enable		= tmc_enable_etr_sink,
+	.disable	= tmc_disable_etr_sink,
+	.alloc_buffer	= tmc_alloc_etr_buffer,
+	.update_buffer	= tmc_update_etr_buffer,
+	.free_buffer	= tmc_free_etr_buffer,
+};
+
+const struct coresight_ops tmc_etr_cs_ops = {
+	.sink_ops	= &tmc_etr_sink_ops,
+};
+
+int tmc_read_prepare_etr(struct tmc_drvdata *drvdata)
+{
+	int ret = 0;
+	unsigned long flags;
+
+	/* config types are set a boot time and never change */
+	if (WARN_ON_ONCE(drvdata->config_type != TMC_CONFIG_TYPE_ETR))
+		return -EINVAL;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	if (drvdata->reading) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	/*
+	 * We can safely allow reads even if the ETR is operating in PERF mode,
+	 * since the sysfs session is captured in mode specific data.
+	 * If drvdata::sysfs_data is NULL the trace data has been read already.
+	 */
+	if (!drvdata->sysfs_buf) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* Disable the TMC if we are trying to read from a running session. */
+	if (drvdata->mode == CS_MODE_SYSFS)
+		__tmc_etr_disable_hw(drvdata);
+
+	drvdata->reading = true;
+out:
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	return ret;
+}
+
+int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata)
+{
+	unsigned long flags;
+	struct etr_buf *sysfs_buf = NULL;
+
+	/* config types are set a boot time and never change */
+	if (WARN_ON_ONCE(drvdata->config_type != TMC_CONFIG_TYPE_ETR))
+		return -EINVAL;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	/* RE-enable the TMC if need be */
+	if (drvdata->mode == CS_MODE_SYSFS) {
+		/*
+		 * The trace run will continue with the same allocated trace
+		 * buffer. Since the tracer is still enabled drvdata::buf can't
+		 * be NULL.
+		 */
+		__tmc_etr_enable_hw(drvdata);
+	} else {
+		/*
+		 * The ETR is not tracing and the buffer was just read.
+		 * As such prepare to free the trace buffer.
+		 */
+		sysfs_buf = drvdata->sysfs_buf;
+		drvdata->sysfs_buf = NULL;
+	}
+
+	drvdata->reading = false;
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	/* Free allocated memory out side of the spinlock */
+	if (sysfs_buf)
+		tmc_etr_free_sysfs_buf(sysfs_buf);
+
+	return 0;
+}
diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h
new file mode 100644
index 0000000000..0ee48c5ba7
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-tmc.h
@@ -0,0 +1,338 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#ifndef _CORESIGHT_TMC_H
+#define _CORESIGHT_TMC_H
+
+#include <linux/dma-mapping.h>
+#include <linux/idr.h>
+#include <linux/miscdevice.h>
+#include <linux/mutex.h>
+#include <linux/refcount.h>
+
+#define TMC_RSZ			0x004
+#define TMC_STS			0x00c
+#define TMC_RRD			0x010
+#define TMC_RRP			0x014
+#define TMC_RWP			0x018
+#define TMC_TRG			0x01c
+#define TMC_CTL			0x020
+#define TMC_RWD			0x024
+#define TMC_MODE		0x028
+#define TMC_LBUFLEVEL		0x02c
+#define TMC_CBUFLEVEL		0x030
+#define TMC_BUFWM		0x034
+#define TMC_RRPHI		0x038
+#define TMC_RWPHI		0x03c
+#define TMC_AXICTL		0x110
+#define TMC_DBALO		0x118
+#define TMC_DBAHI		0x11c
+#define TMC_FFSR		0x300
+#define TMC_FFCR		0x304
+#define TMC_PSCR		0x308
+#define TMC_ITMISCOP0		0xee0
+#define TMC_ITTRFLIN		0xee8
+#define TMC_ITATBDATA0		0xeec
+#define TMC_ITATBCTR2		0xef0
+#define TMC_ITATBCTR1		0xef4
+#define TMC_ITATBCTR0		0xef8
+#define TMC_AUTHSTATUS		0xfb8
+
+/* register description */
+/* TMC_CTL - 0x020 */
+#define TMC_CTL_CAPT_EN		BIT(0)
+/* TMC_STS - 0x00C */
+#define TMC_STS_TMCREADY_BIT	2
+#define TMC_STS_FULL		BIT(0)
+#define TMC_STS_TRIGGERED	BIT(1)
+#define TMC_STS_MEMERR		BIT(5)
+/*
+ * TMC_AXICTL - 0x110
+ *
+ * TMC AXICTL format for SoC-400
+ *	Bits [0-1]	: ProtCtrlBit0-1
+ *	Bits [2-5]	: CacheCtrlBits 0-3 (AXCACHE)
+ *	Bit  6		: Reserved
+ *	Bit  7		: ScatterGatherMode
+ *	Bits [8-11]	: WrBurstLen
+ *	Bits [12-31]	: Reserved.
+ * TMC AXICTL format for SoC-600, as above except:
+ *	Bits [2-5]	: AXI WCACHE
+ *	Bits [16-19]	: AXI RCACHE
+ *	Bits [20-31]	: Reserved
+ */
+#define TMC_AXICTL_CLEAR_MASK 0xfbf
+#define TMC_AXICTL_ARCACHE_MASK (0xf << 16)
+
+#define TMC_AXICTL_PROT_CTL_B0	BIT(0)
+#define TMC_AXICTL_PROT_CTL_B1	BIT(1)
+#define TMC_AXICTL_SCT_GAT_MODE	BIT(7)
+#define TMC_AXICTL_WR_BURST(v)	(((v) & 0xf) << 8)
+#define TMC_AXICTL_WR_BURST_16	0xf
+/* Write-back Read and Write-allocate */
+#define TMC_AXICTL_AXCACHE_OS	(0xf << 2)
+#define TMC_AXICTL_ARCACHE_OS	(0xf << 16)
+
+/* TMC_FFCR - 0x304 */
+#define TMC_FFCR_FLUSHMAN_BIT	6
+#define TMC_FFCR_EN_FMT		BIT(0)
+#define TMC_FFCR_EN_TI		BIT(1)
+#define TMC_FFCR_FON_FLIN	BIT(4)
+#define TMC_FFCR_FON_TRIG_EVT	BIT(5)
+#define TMC_FFCR_TRIGON_TRIGIN	BIT(8)
+#define TMC_FFCR_STOP_ON_FLUSH	BIT(12)
+
+
+#define TMC_DEVID_NOSCAT	BIT(24)
+
+#define TMC_DEVID_AXIAW_VALID	BIT(16)
+#define TMC_DEVID_AXIAW_SHIFT	17
+#define TMC_DEVID_AXIAW_MASK	0x7f
+
+#define TMC_AUTH_NSID_MASK	GENMASK(1, 0)
+
+enum tmc_config_type {
+	TMC_CONFIG_TYPE_ETB,
+	TMC_CONFIG_TYPE_ETR,
+	TMC_CONFIG_TYPE_ETF,
+};
+
+enum tmc_mode {
+	TMC_MODE_CIRCULAR_BUFFER,
+	TMC_MODE_SOFTWARE_FIFO,
+	TMC_MODE_HARDWARE_FIFO,
+};
+
+enum tmc_mem_intf_width {
+	TMC_MEM_INTF_WIDTH_32BITS	= 1,
+	TMC_MEM_INTF_WIDTH_64BITS	= 2,
+	TMC_MEM_INTF_WIDTH_128BITS	= 4,
+	TMC_MEM_INTF_WIDTH_256BITS	= 8,
+};
+
+/* TMC ETR Capability bit definitions */
+#define TMC_ETR_SG			(0x1U << 0)
+/* ETR has separate read/write cache encodings */
+#define TMC_ETR_AXI_ARCACHE		(0x1U << 1)
+/*
+ * TMC_ETR_SAVE_RESTORE - Values of RRP/RWP/STS.Full are
+ * retained when TMC leaves Disabled state, allowing us to continue
+ * the tracing from a point where we stopped. This also implies that
+ * the RRP/RWP/STS.Full should always be programmed to the correct
+ * value. Unfortunately this is not advertised by the hardware,
+ * so we have to rely on PID of the IP to detect the functionality.
+ */
+#define TMC_ETR_SAVE_RESTORE		(0x1U << 2)
+
+/* Coresight SoC-600 TMC-ETR unadvertised capabilities */
+#define CORESIGHT_SOC_600_ETR_CAPS	\
+	(TMC_ETR_SAVE_RESTORE | TMC_ETR_AXI_ARCACHE)
+
+enum etr_mode {
+	ETR_MODE_FLAT,		/* Uses contiguous flat buffer */
+	ETR_MODE_ETR_SG,	/* Uses in-built TMC ETR SG mechanism */
+	ETR_MODE_CATU,		/* Use SG mechanism in CATU */
+};
+
+struct etr_buf_operations;
+
+/**
+ * struct etr_buf - Details of the buffer used by ETR
+ * refcount	; Number of sources currently using this etr_buf.
+ * @mode	: Mode of the ETR buffer, contiguous, Scatter Gather etc.
+ * @full	: Trace data overflow
+ * @size	: Size of the buffer.
+ * @hwaddr	: Address to be programmed in the TMC:DBA{LO,HI}
+ * @offset	: Offset of the trace data in the buffer for consumption.
+ * @len		: Available trace data @buf (may round up to the beginning).
+ * @ops		: ETR buffer operations for the mode.
+ * @private	: Backend specific information for the buf
+ */
+struct etr_buf {
+	refcount_t			refcount;
+	enum etr_mode			mode;
+	bool				full;
+	ssize_t				size;
+	dma_addr_t			hwaddr;
+	unsigned long			offset;
+	s64				len;
+	const struct etr_buf_operations	*ops;
+	void				*private;
+};
+
+/**
+ * struct tmc_drvdata - specifics associated to an TMC component
+ * @base:	memory mapped base address for this component.
+ * @csdev:	component vitals needed by the framework.
+ * @miscdev:	specifics to handle "/dev/xyz.tmc" entry.
+ * @spinlock:	only one at a time pls.
+ * @pid:	Process ID of the process being monitored by the session
+ *		that is using this component.
+ * @buf:	Snapshot of the trace data for ETF/ETB.
+ * @etr_buf:	details of buffer used in TMC-ETR
+ * @len:	size of the available trace for ETF/ETB.
+ * @size:	trace buffer size for this TMC (common for all modes).
+ * @max_burst_size: The maximum burst size that can be initiated by
+ *		TMC-ETR on AXI bus.
+ * @mode:	how this TMC is being used.
+ * @config_type: TMC variant, must be of type @tmc_config_type.
+ * @memwidth:	width of the memory interface databus, in bytes.
+ * @trigger_cntr: amount of words to store after a trigger.
+ * @etr_caps:	Bitmask of capabilities of the TMC ETR, inferred from the
+ *		device configuration register (DEVID)
+ * @idr:	Holds etr_bufs allocated for this ETR.
+ * @idr_mutex:	Access serialisation for idr.
+ * @sysfs_buf:	SYSFS buffer for ETR.
+ * @perf_buf:	PERF buffer for ETR.
+ */
+struct tmc_drvdata {
+	void __iomem		*base;
+	struct coresight_device	*csdev;
+	struct miscdevice	miscdev;
+	spinlock_t		spinlock;
+	pid_t			pid;
+	bool			reading;
+	union {
+		char		*buf;		/* TMC ETB */
+		struct etr_buf	*etr_buf;	/* TMC ETR */
+	};
+	u32			len;
+	u32			size;
+	u32			max_burst_size;
+	u32			mode;
+	enum tmc_config_type	config_type;
+	enum tmc_mem_intf_width	memwidth;
+	u32			trigger_cntr;
+	u32			etr_caps;
+	struct idr		idr;
+	struct mutex		idr_mutex;
+	struct etr_buf		*sysfs_buf;
+	struct etr_buf		*perf_buf;
+};
+
+struct etr_buf_operations {
+	int (*alloc)(struct tmc_drvdata *drvdata, struct etr_buf *etr_buf,
+		     int node, void **pages);
+	void (*sync)(struct etr_buf *etr_buf, u64 rrp, u64 rwp);
+	ssize_t (*get_data)(struct etr_buf *etr_buf, u64 offset, size_t len,
+			    char **bufpp);
+	void (*free)(struct etr_buf *etr_buf);
+};
+
+/**
+ * struct tmc_pages - Collection of pages used for SG.
+ * @nr_pages:		Number of pages in the list.
+ * @daddrs:		Array of DMA'able page address.
+ * @pages:		Array pages for the buffer.
+ */
+struct tmc_pages {
+	int nr_pages;
+	dma_addr_t	*daddrs;
+	struct page	**pages;
+};
+
+/*
+ * struct tmc_sg_table - Generic SG table for TMC
+ * @dev:		Device for DMA allocations
+ * @table_vaddr:	Contiguous Virtual address for PageTable
+ * @data_vaddr:		Contiguous Virtual address for Data Buffer
+ * @table_daddr:	DMA address of the PageTable base
+ * @node:		Node for Page allocations
+ * @table_pages:	List of pages & dma address for Table
+ * @data_pages:		List of pages & dma address for Data
+ */
+struct tmc_sg_table {
+	struct device *dev;
+	void *table_vaddr;
+	void *data_vaddr;
+	dma_addr_t table_daddr;
+	int node;
+	struct tmc_pages table_pages;
+	struct tmc_pages data_pages;
+};
+
+/* Generic functions */
+int tmc_wait_for_tmcready(struct tmc_drvdata *drvdata);
+void tmc_flush_and_stop(struct tmc_drvdata *drvdata);
+void tmc_enable_hw(struct tmc_drvdata *drvdata);
+void tmc_disable_hw(struct tmc_drvdata *drvdata);
+u32 tmc_get_memwidth_mask(struct tmc_drvdata *drvdata);
+
+/* ETB/ETF functions */
+int tmc_read_prepare_etb(struct tmc_drvdata *drvdata);
+int tmc_read_unprepare_etb(struct tmc_drvdata *drvdata);
+extern const struct coresight_ops tmc_etb_cs_ops;
+extern const struct coresight_ops tmc_etf_cs_ops;
+
+ssize_t tmc_etb_get_sysfs_trace(struct tmc_drvdata *drvdata,
+				loff_t pos, size_t len, char **bufpp);
+/* ETR functions */
+int tmc_read_prepare_etr(struct tmc_drvdata *drvdata);
+int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata);
+void tmc_etr_disable_hw(struct tmc_drvdata *drvdata);
+extern const struct coresight_ops tmc_etr_cs_ops;
+ssize_t tmc_etr_get_sysfs_trace(struct tmc_drvdata *drvdata,
+				loff_t pos, size_t len, char **bufpp);
+
+
+#define TMC_REG_PAIR(name, lo_off, hi_off)				\
+static inline u64							\
+tmc_read_##name(struct tmc_drvdata *drvdata)				\
+{									\
+	return csdev_access_relaxed_read_pair(&drvdata->csdev->access, lo_off, hi_off); \
+}									\
+static inline void							\
+tmc_write_##name(struct tmc_drvdata *drvdata, u64 val)			\
+{									\
+	csdev_access_relaxed_write_pair(&drvdata->csdev->access, val, lo_off, hi_off); \
+}
+
+TMC_REG_PAIR(rrp, TMC_RRP, TMC_RRPHI)
+TMC_REG_PAIR(rwp, TMC_RWP, TMC_RWPHI)
+TMC_REG_PAIR(dba, TMC_DBALO, TMC_DBAHI)
+
+/* Initialise the caps from unadvertised static capabilities of the device */
+static inline void tmc_etr_init_caps(struct tmc_drvdata *drvdata, u32 dev_caps)
+{
+	WARN_ON(drvdata->etr_caps);
+	drvdata->etr_caps = dev_caps;
+}
+
+static inline void tmc_etr_set_cap(struct tmc_drvdata *drvdata, u32 cap)
+{
+	drvdata->etr_caps |= cap;
+}
+
+static inline bool tmc_etr_has_cap(struct tmc_drvdata *drvdata, u32 cap)
+{
+	return !!(drvdata->etr_caps & cap);
+}
+
+struct tmc_sg_table *tmc_alloc_sg_table(struct device *dev,
+					int node,
+					int nr_tpages,
+					int nr_dpages,
+					void **pages);
+void tmc_free_sg_table(struct tmc_sg_table *sg_table);
+void tmc_sg_table_sync_table(struct tmc_sg_table *sg_table);
+void tmc_sg_table_sync_data_range(struct tmc_sg_table *table,
+				  u64 offset, u64 size);
+ssize_t tmc_sg_table_get_data(struct tmc_sg_table *sg_table,
+			      u64 offset, size_t len, char **bufpp);
+static inline unsigned long
+tmc_sg_table_buf_size(struct tmc_sg_table *sg_table)
+{
+	return (unsigned long)sg_table->data_pages.nr_pages << PAGE_SHIFT;
+}
+
+struct coresight_device *tmc_etr_get_catu_device(struct tmc_drvdata *drvdata);
+
+void tmc_etr_set_catu_ops(const struct etr_buf_operations *catu);
+void tmc_etr_remove_catu_ops(void);
+struct etr_buf *tmc_etr_get_buffer(struct coresight_device *csdev,
+				   enum cs_mode mode, void *data);
+
+#endif
diff --git a/drivers/hwtracing/coresight/coresight-tpda.c b/drivers/hwtracing/coresight/coresight-tpda.c
new file mode 100644
index 0000000000..8d2b9d2923
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-tpda.c
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/amba/bus.h>
+#include <linux/bitfield.h>
+#include <linux/coresight.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#include "coresight-priv.h"
+#include "coresight-tpda.h"
+#include "coresight-trace-id.h"
+
+DEFINE_CORESIGHT_DEVLIST(tpda_devs, "tpda");
+
+/* Settings pre enabling port control register */
+static void tpda_enable_pre_port(struct tpda_drvdata *drvdata)
+{
+	u32 val;
+
+	val = readl_relaxed(drvdata->base + TPDA_CR);
+	val &= ~TPDA_CR_ATID;
+	val |= FIELD_PREP(TPDA_CR_ATID, drvdata->atid);
+	writel_relaxed(val, drvdata->base + TPDA_CR);
+}
+
+static void tpda_enable_port(struct tpda_drvdata *drvdata, int port)
+{
+	u32 val;
+
+	val = readl_relaxed(drvdata->base + TPDA_Pn_CR(port));
+	/* Enable the port */
+	val |= TPDA_Pn_CR_ENA;
+	writel_relaxed(val, drvdata->base + TPDA_Pn_CR(port));
+}
+
+static void __tpda_enable(struct tpda_drvdata *drvdata, int port)
+{
+	CS_UNLOCK(drvdata->base);
+
+	if (!drvdata->csdev->enable)
+		tpda_enable_pre_port(drvdata);
+
+	tpda_enable_port(drvdata, port);
+
+	CS_LOCK(drvdata->base);
+}
+
+static int tpda_enable(struct coresight_device *csdev,
+		       struct coresight_connection *in,
+		       struct coresight_connection *out)
+{
+	struct tpda_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	spin_lock(&drvdata->spinlock);
+	if (atomic_read(&in->dest_refcnt) == 0)
+		__tpda_enable(drvdata, in->dest_port);
+
+	atomic_inc(&in->dest_refcnt);
+	spin_unlock(&drvdata->spinlock);
+
+	dev_dbg(drvdata->dev, "TPDA inport %d enabled.\n", in->dest_port);
+	return 0;
+}
+
+static void __tpda_disable(struct tpda_drvdata *drvdata, int port)
+{
+	u32 val;
+
+	CS_UNLOCK(drvdata->base);
+
+	val = readl_relaxed(drvdata->base + TPDA_Pn_CR(port));
+	val &= ~TPDA_Pn_CR_ENA;
+	writel_relaxed(val, drvdata->base + TPDA_Pn_CR(port));
+
+	CS_LOCK(drvdata->base);
+}
+
+static void tpda_disable(struct coresight_device *csdev,
+			 struct coresight_connection *in,
+			 struct coresight_connection *out)
+{
+	struct tpda_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	spin_lock(&drvdata->spinlock);
+	if (atomic_dec_return(&in->dest_refcnt) == 0)
+		__tpda_disable(drvdata, in->dest_port);
+
+	spin_unlock(&drvdata->spinlock);
+
+	dev_dbg(drvdata->dev, "TPDA inport %d disabled\n", in->dest_port);
+}
+
+static const struct coresight_ops_link tpda_link_ops = {
+	.enable		= tpda_enable,
+	.disable	= tpda_disable,
+};
+
+static const struct coresight_ops tpda_cs_ops = {
+	.link_ops	= &tpda_link_ops,
+};
+
+static int tpda_init_default_data(struct tpda_drvdata *drvdata)
+{
+	int atid;
+	/*
+	 * TPDA must has a unique atid. This atid can uniquely
+	 * identify the TPDM trace source connected to the TPDA.
+	 * The TPDMs which are connected to same TPDA share the
+	 * same trace-id. When TPDA does packetization, different
+	 * port will have unique channel number for decoding.
+	 */
+	atid = coresight_trace_id_get_system_id();
+	if (atid < 0)
+		return atid;
+
+	drvdata->atid = atid;
+	return 0;
+}
+
+static int tpda_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	int ret;
+	struct device *dev = &adev->dev;
+	struct coresight_platform_data *pdata;
+	struct tpda_drvdata *drvdata;
+	struct coresight_desc desc = { 0 };
+	void __iomem *base;
+
+	pdata = coresight_get_platform_data(dev);
+	if (IS_ERR(pdata))
+		return PTR_ERR(pdata);
+	adev->dev.platform_data = pdata;
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	drvdata->dev = &adev->dev;
+	dev_set_drvdata(dev, drvdata);
+
+	base = devm_ioremap_resource(dev, &adev->res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+	drvdata->base = base;
+
+	spin_lock_init(&drvdata->spinlock);
+
+	ret = tpda_init_default_data(drvdata);
+	if (ret)
+		return ret;
+
+	desc.name = coresight_alloc_device_name(&tpda_devs, dev);
+	if (!desc.name)
+		return -ENOMEM;
+	desc.type = CORESIGHT_DEV_TYPE_LINK;
+	desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_MERG;
+	desc.ops = &tpda_cs_ops;
+	desc.pdata = adev->dev.platform_data;
+	desc.dev = &adev->dev;
+	desc.access = CSDEV_ACCESS_IOMEM(base);
+	drvdata->csdev = coresight_register(&desc);
+	if (IS_ERR(drvdata->csdev))
+		return PTR_ERR(drvdata->csdev);
+
+	pm_runtime_put(&adev->dev);
+
+	dev_dbg(drvdata->dev, "TPDA initialized\n");
+	return 0;
+}
+
+static void tpda_remove(struct amba_device *adev)
+{
+	struct tpda_drvdata *drvdata = dev_get_drvdata(&adev->dev);
+
+	coresight_trace_id_put_system_id(drvdata->atid);
+	coresight_unregister(drvdata->csdev);
+}
+
+/*
+ * Different TPDA has different periph id.
+ * The difference is 0-7 bits' value. So ignore 0-7 bits.
+ */
+static struct amba_id tpda_ids[] = {
+	{
+		.id     = 0x000f0f00,
+		.mask   = 0x000fff00,
+	},
+	{ 0, 0},
+};
+
+static struct amba_driver tpda_driver = {
+	.drv = {
+		.name   = "coresight-tpda",
+		.owner	= THIS_MODULE,
+		.suppress_bind_attrs = true,
+	},
+	.probe          = tpda_probe,
+	.remove		= tpda_remove,
+	.id_table	= tpda_ids,
+};
+
+module_amba_driver(tpda_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Trace, Profiling & Diagnostic Aggregator driver");
diff --git a/drivers/hwtracing/coresight/coresight-tpda.h b/drivers/hwtracing/coresight/coresight-tpda.h
new file mode 100644
index 0000000000..0399678df3
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-tpda.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _CORESIGHT_CORESIGHT_TPDA_H
+#define _CORESIGHT_CORESIGHT_TPDA_H
+
+#define TPDA_CR			(0x000)
+#define TPDA_Pn_CR(n)		(0x004 + (n * 4))
+/* Aggregator port enable bit */
+#define TPDA_Pn_CR_ENA		BIT(0)
+
+#define TPDA_MAX_INPORTS	32
+
+/* Bits 6 ~ 12 is for atid value */
+#define TPDA_CR_ATID		GENMASK(12, 6)
+
+/**
+ * struct tpda_drvdata - specifics associated to an TPDA component
+ * @base:       memory mapped base address for this component.
+ * @dev:        The device entity associated to this component.
+ * @csdev:      component vitals needed by the framework.
+ * @spinlock:   lock for the drvdata value.
+ * @enable:     enable status of the component.
+ */
+struct tpda_drvdata {
+	void __iomem		*base;
+	struct device		*dev;
+	struct coresight_device	*csdev;
+	spinlock_t		spinlock;
+	u8			atid;
+};
+
+#endif  /* _CORESIGHT_CORESIGHT_TPDA_H */
diff --git a/drivers/hwtracing/coresight/coresight-tpdm.c b/drivers/hwtracing/coresight/coresight-tpdm.c
new file mode 100644
index 0000000000..f4854af043
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-tpdm.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/amba/bus.h>
+#include <linux/bitmap.h>
+#include <linux/coresight.h>
+#include <linux/coresight-pmu.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+
+#include "coresight-priv.h"
+#include "coresight-tpdm.h"
+
+DEFINE_CORESIGHT_DEVLIST(tpdm_devs, "tpdm");
+
+static void tpdm_enable_dsb(struct tpdm_drvdata *drvdata)
+{
+	u32 val;
+
+	/* Set the enable bit of DSB control register to 1 */
+	val = readl_relaxed(drvdata->base + TPDM_DSB_CR);
+	val |= TPDM_DSB_CR_ENA;
+	writel_relaxed(val, drvdata->base + TPDM_DSB_CR);
+}
+
+/* TPDM enable operations */
+static void __tpdm_enable(struct tpdm_drvdata *drvdata)
+{
+	CS_UNLOCK(drvdata->base);
+
+	/* Check if DSB datasets is present for TPDM. */
+	if (drvdata->datasets & TPDM_PIDR0_DS_DSB)
+		tpdm_enable_dsb(drvdata);
+
+	CS_LOCK(drvdata->base);
+}
+
+static int tpdm_enable(struct coresight_device *csdev, struct perf_event *event,
+		       enum cs_mode mode)
+{
+	struct tpdm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	spin_lock(&drvdata->spinlock);
+	if (drvdata->enable) {
+		spin_unlock(&drvdata->spinlock);
+		return -EBUSY;
+	}
+
+	__tpdm_enable(drvdata);
+	drvdata->enable = true;
+	spin_unlock(&drvdata->spinlock);
+
+	dev_dbg(drvdata->dev, "TPDM tracing enabled\n");
+	return 0;
+}
+
+static void tpdm_disable_dsb(struct tpdm_drvdata *drvdata)
+{
+	u32 val;
+
+	/* Set the enable bit of DSB control register to 0 */
+	val = readl_relaxed(drvdata->base + TPDM_DSB_CR);
+	val &= ~TPDM_DSB_CR_ENA;
+	writel_relaxed(val, drvdata->base + TPDM_DSB_CR);
+}
+
+/* TPDM disable operations */
+static void __tpdm_disable(struct tpdm_drvdata *drvdata)
+{
+	CS_UNLOCK(drvdata->base);
+
+	/* Check if DSB datasets is present for TPDM. */
+	if (drvdata->datasets & TPDM_PIDR0_DS_DSB)
+		tpdm_disable_dsb(drvdata);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void tpdm_disable(struct coresight_device *csdev,
+			 struct perf_event *event)
+{
+	struct tpdm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	spin_lock(&drvdata->spinlock);
+	if (!drvdata->enable) {
+		spin_unlock(&drvdata->spinlock);
+		return;
+	}
+
+	__tpdm_disable(drvdata);
+	drvdata->enable = false;
+	spin_unlock(&drvdata->spinlock);
+
+	dev_dbg(drvdata->dev, "TPDM tracing disabled\n");
+}
+
+static const struct coresight_ops_source tpdm_source_ops = {
+	.enable		= tpdm_enable,
+	.disable	= tpdm_disable,
+};
+
+static const struct coresight_ops tpdm_cs_ops = {
+	.source_ops	= &tpdm_source_ops,
+};
+
+static void tpdm_init_default_data(struct tpdm_drvdata *drvdata)
+{
+	u32 pidr;
+
+	CS_UNLOCK(drvdata->base);
+	/*  Get the datasets present on the TPDM. */
+	pidr = readl_relaxed(drvdata->base + CORESIGHT_PERIPHIDR0);
+	drvdata->datasets |= pidr & GENMASK(TPDM_DATASETS - 1, 0);
+	CS_LOCK(drvdata->base);
+}
+
+/*
+ * value 1: 64 bits test data
+ * value 2: 32 bits test data
+ */
+static ssize_t integration_test_store(struct device *dev,
+					  struct device_attribute *attr,
+					  const char *buf,
+					  size_t size)
+{
+	int i, ret = 0;
+	unsigned long val;
+	struct tpdm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val != 1 && val != 2)
+		return -EINVAL;
+
+	if (!drvdata->enable)
+		return -EINVAL;
+
+	if (val == 1)
+		val = ATBCNTRL_VAL_64;
+	else
+		val = ATBCNTRL_VAL_32;
+	CS_UNLOCK(drvdata->base);
+	writel_relaxed(0x1, drvdata->base + TPDM_ITCNTRL);
+
+	for (i = 0; i < INTEGRATION_TEST_CYCLE; i++)
+		writel_relaxed(val, drvdata->base + TPDM_ITATBCNTRL);
+
+	writel_relaxed(0, drvdata->base + TPDM_ITCNTRL);
+	CS_LOCK(drvdata->base);
+	return size;
+}
+static DEVICE_ATTR_WO(integration_test);
+
+static struct attribute *tpdm_attrs[] = {
+	&dev_attr_integration_test.attr,
+	NULL,
+};
+
+static struct attribute_group tpdm_attr_grp = {
+	.attrs = tpdm_attrs,
+};
+
+static const struct attribute_group *tpdm_attr_grps[] = {
+	&tpdm_attr_grp,
+	NULL,
+};
+
+static int tpdm_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	void __iomem *base;
+	struct device *dev = &adev->dev;
+	struct coresight_platform_data *pdata;
+	struct tpdm_drvdata *drvdata;
+	struct coresight_desc desc = { 0 };
+
+	pdata = coresight_get_platform_data(dev);
+	if (IS_ERR(pdata))
+		return PTR_ERR(pdata);
+	adev->dev.platform_data = pdata;
+
+	/* driver data*/
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+	drvdata->dev = &adev->dev;
+	dev_set_drvdata(dev, drvdata);
+
+	base = devm_ioremap_resource(dev, &adev->res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	drvdata->base = base;
+
+	/* Set up coresight component description */
+	desc.name = coresight_alloc_device_name(&tpdm_devs, dev);
+	if (!desc.name)
+		return -ENOMEM;
+	desc.type = CORESIGHT_DEV_TYPE_SOURCE;
+	desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_OTHERS;
+	desc.ops = &tpdm_cs_ops;
+	desc.pdata = adev->dev.platform_data;
+	desc.dev = &adev->dev;
+	desc.access = CSDEV_ACCESS_IOMEM(base);
+	desc.groups = tpdm_attr_grps;
+	drvdata->csdev = coresight_register(&desc);
+	if (IS_ERR(drvdata->csdev))
+		return PTR_ERR(drvdata->csdev);
+
+	spin_lock_init(&drvdata->spinlock);
+	tpdm_init_default_data(drvdata);
+	/* Decrease pm refcount when probe is done.*/
+	pm_runtime_put(&adev->dev);
+
+	return 0;
+}
+
+static void tpdm_remove(struct amba_device *adev)
+{
+	struct tpdm_drvdata *drvdata = dev_get_drvdata(&adev->dev);
+
+	coresight_unregister(drvdata->csdev);
+}
+
+/*
+ * Different TPDM has different periph id.
+ * The difference is 0-7 bits' value. So ignore 0-7 bits.
+ */
+static struct amba_id tpdm_ids[] = {
+	{
+		.id = 0x000f0e00,
+		.mask = 0x000fff00,
+	},
+	{ 0, 0},
+};
+
+static struct amba_driver tpdm_driver = {
+	.drv = {
+		.name   = "coresight-tpdm",
+		.owner	= THIS_MODULE,
+		.suppress_bind_attrs = true,
+	},
+	.probe          = tpdm_probe,
+	.id_table	= tpdm_ids,
+	.remove		= tpdm_remove,
+};
+
+module_amba_driver(tpdm_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Trace, Profiling & Diagnostic Monitor driver");
diff --git a/drivers/hwtracing/coresight/coresight-tpdm.h b/drivers/hwtracing/coresight/coresight-tpdm.h
new file mode 100644
index 0000000000..543854043a
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-tpdm.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _CORESIGHT_CORESIGHT_TPDM_H
+#define _CORESIGHT_CORESIGHT_TPDM_H
+
+/* The max number of the datasets that TPDM supports */
+#define TPDM_DATASETS       7
+
+/* DSB Subunit Registers */
+#define TPDM_DSB_CR		(0x780)
+/* Enable bit for DSB subunit */
+#define TPDM_DSB_CR_ENA		BIT(0)
+
+/* TPDM integration test registers */
+#define TPDM_ITATBCNTRL		(0xEF0)
+#define TPDM_ITCNTRL		(0xF00)
+
+/* Register value for integration test */
+#define ATBCNTRL_VAL_32		0xC00F1409
+#define ATBCNTRL_VAL_64		0xC01F1409
+
+/*
+ * Number of cycles to write value when
+ * integration test.
+ */
+#define INTEGRATION_TEST_CYCLE	10
+
+/**
+ * The bits of PERIPHIDR0 register.
+ * The fields [6:0] of PERIPHIDR0 are used to determine what
+ * interfaces and subunits are present on a given TPDM.
+ *
+ * PERIPHIDR0[0] : Fix to 1 if ImplDef subunit present, else 0
+ * PERIPHIDR0[1] : Fix to 1 if DSB subunit present, else 0
+ */
+
+#define TPDM_PIDR0_DS_IMPDEF	BIT(0)
+#define TPDM_PIDR0_DS_DSB	BIT(1)
+
+/**
+ * struct tpdm_drvdata - specifics associated to an TPDM component
+ * @base:       memory mapped base address for this component.
+ * @dev:        The device entity associated to this component.
+ * @csdev:      component vitals needed by the framework.
+ * @spinlock:   lock for the drvdata value.
+ * @enable:     enable status of the component.
+ * @datasets:   The datasets types present of the TPDM.
+ */
+
+struct tpdm_drvdata {
+	void __iomem		*base;
+	struct device		*dev;
+	struct coresight_device	*csdev;
+	spinlock_t		spinlock;
+	bool			enable;
+	unsigned long		datasets;
+};
+
+#endif  /* _CORESIGHT_CORESIGHT_TPDM_H */
diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c
new file mode 100644
index 0000000000..59eac93fd6
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-tpiu.c
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ *
+ * Description: CoreSight Trace Port Interface Unit driver
+ */
+
+#include <linux/atomic.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/pm_runtime.h>
+#include <linux/coresight.h>
+#include <linux/amba/bus.h>
+#include <linux/clk.h>
+
+#include "coresight-priv.h"
+
+#define TPIU_SUPP_PORTSZ	0x000
+#define TPIU_CURR_PORTSZ	0x004
+#define TPIU_SUPP_TRIGMODES	0x100
+#define TPIU_TRIG_CNTRVAL	0x104
+#define TPIU_TRIG_MULT		0x108
+#define TPIU_SUPP_TESTPATM	0x200
+#define TPIU_CURR_TESTPATM	0x204
+#define TPIU_TEST_PATREPCNTR	0x208
+#define TPIU_FFSR		0x300
+#define TPIU_FFCR		0x304
+#define TPIU_FSYNC_CNTR		0x308
+#define TPIU_EXTCTL_INPORT	0x400
+#define TPIU_EXTCTL_OUTPORT	0x404
+#define TPIU_ITTRFLINACK	0xee4
+#define TPIU_ITTRFLIN		0xee8
+#define TPIU_ITATBDATA0		0xeec
+#define TPIU_ITATBCTR2		0xef0
+#define TPIU_ITATBCTR1		0xef4
+#define TPIU_ITATBCTR0		0xef8
+
+/** register definition **/
+/* FFSR - 0x300 */
+#define FFSR_FT_STOPPED_BIT	1
+/* FFCR - 0x304 */
+#define FFCR_FON_MAN_BIT	6
+#define FFCR_FON_MAN		BIT(6)
+#define FFCR_STOP_FI		BIT(12)
+
+DEFINE_CORESIGHT_DEVLIST(tpiu_devs, "tpiu");
+
+/*
+ * @base:	memory mapped base address for this component.
+ * @atclk:	optional clock for the core parts of the TPIU.
+ * @csdev:	component vitals needed by the framework.
+ */
+struct tpiu_drvdata {
+	void __iomem		*base;
+	struct clk		*atclk;
+	struct coresight_device	*csdev;
+};
+
+static void tpiu_enable_hw(struct csdev_access *csa)
+{
+	CS_UNLOCK(csa->base);
+
+	/* TODO: fill this up */
+
+	CS_LOCK(csa->base);
+}
+
+static int tpiu_enable(struct coresight_device *csdev, enum cs_mode mode,
+		       void *__unused)
+{
+	tpiu_enable_hw(&csdev->access);
+	atomic_inc(&csdev->refcnt);
+	dev_dbg(&csdev->dev, "TPIU enabled\n");
+	return 0;
+}
+
+static void tpiu_disable_hw(struct csdev_access *csa)
+{
+	CS_UNLOCK(csa->base);
+
+	/* Clear formatter and stop on flush */
+	csdev_access_relaxed_write32(csa, FFCR_STOP_FI, TPIU_FFCR);
+	/* Generate manual flush */
+	csdev_access_relaxed_write32(csa, FFCR_STOP_FI | FFCR_FON_MAN, TPIU_FFCR);
+	/* Wait for flush to complete */
+	coresight_timeout(csa, TPIU_FFCR, FFCR_FON_MAN_BIT, 0);
+	/* Wait for formatter to stop */
+	coresight_timeout(csa, TPIU_FFSR, FFSR_FT_STOPPED_BIT, 1);
+
+	CS_LOCK(csa->base);
+}
+
+static int tpiu_disable(struct coresight_device *csdev)
+{
+	if (atomic_dec_return(&csdev->refcnt))
+		return -EBUSY;
+
+	tpiu_disable_hw(&csdev->access);
+
+	dev_dbg(&csdev->dev, "TPIU disabled\n");
+	return 0;
+}
+
+static const struct coresight_ops_sink tpiu_sink_ops = {
+	.enable		= tpiu_enable,
+	.disable	= tpiu_disable,
+};
+
+static const struct coresight_ops tpiu_cs_ops = {
+	.sink_ops	= &tpiu_sink_ops,
+};
+
+static int tpiu_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	int ret;
+	void __iomem *base;
+	struct device *dev = &adev->dev;
+	struct coresight_platform_data *pdata = NULL;
+	struct tpiu_drvdata *drvdata;
+	struct resource *res = &adev->res;
+	struct coresight_desc desc = { 0 };
+
+	desc.name = coresight_alloc_device_name(&tpiu_devs, dev);
+	if (!desc.name)
+		return -ENOMEM;
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	drvdata->atclk = devm_clk_get(&adev->dev, "atclk"); /* optional */
+	if (!IS_ERR(drvdata->atclk)) {
+		ret = clk_prepare_enable(drvdata->atclk);
+		if (ret)
+			return ret;
+	}
+	dev_set_drvdata(dev, drvdata);
+
+	/* Validity for the resource is already checked by the AMBA core */
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	drvdata->base = base;
+	desc.access = CSDEV_ACCESS_IOMEM(base);
+
+	/* Disable tpiu to support older devices */
+	tpiu_disable_hw(&desc.access);
+
+	pdata = coresight_get_platform_data(dev);
+	if (IS_ERR(pdata))
+		return PTR_ERR(pdata);
+	dev->platform_data = pdata;
+
+	desc.type = CORESIGHT_DEV_TYPE_SINK;
+	desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_PORT;
+	desc.ops = &tpiu_cs_ops;
+	desc.pdata = pdata;
+	desc.dev = dev;
+	drvdata->csdev = coresight_register(&desc);
+
+	if (!IS_ERR(drvdata->csdev)) {
+		pm_runtime_put(&adev->dev);
+		return 0;
+	}
+
+	return PTR_ERR(drvdata->csdev);
+}
+
+static void tpiu_remove(struct amba_device *adev)
+{
+	struct tpiu_drvdata *drvdata = dev_get_drvdata(&adev->dev);
+
+	coresight_unregister(drvdata->csdev);
+}
+
+#ifdef CONFIG_PM
+static int tpiu_runtime_suspend(struct device *dev)
+{
+	struct tpiu_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata && !IS_ERR(drvdata->atclk))
+		clk_disable_unprepare(drvdata->atclk);
+
+	return 0;
+}
+
+static int tpiu_runtime_resume(struct device *dev)
+{
+	struct tpiu_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (drvdata && !IS_ERR(drvdata->atclk))
+		clk_prepare_enable(drvdata->atclk);
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops tpiu_dev_pm_ops = {
+	SET_RUNTIME_PM_OPS(tpiu_runtime_suspend, tpiu_runtime_resume, NULL)
+};
+
+static const struct amba_id tpiu_ids[] = {
+	{
+		.id	= 0x000bb912,
+		.mask	= 0x000fffff,
+	},
+	{
+		.id	= 0x0004b912,
+		.mask	= 0x0007ffff,
+	},
+	{
+		/* Coresight SoC-600 */
+		.id	= 0x000bb9e7,
+		.mask	= 0x000fffff,
+	},
+	{ 0, 0},
+};
+
+MODULE_DEVICE_TABLE(amba, tpiu_ids);
+
+static struct amba_driver tpiu_driver = {
+	.drv = {
+		.name	= "coresight-tpiu",
+		.owner	= THIS_MODULE,
+		.pm	= &tpiu_dev_pm_ops,
+		.suppress_bind_attrs = true,
+	},
+	.probe		= tpiu_probe,
+	.remove         = tpiu_remove,
+	.id_table	= tpiu_ids,
+};
+
+module_amba_driver(tpiu_driver);
+
+MODULE_AUTHOR("Pratik Patel <pratikp@codeaurora.org>");
+MODULE_AUTHOR("Mathieu Poirier <mathieu.poirier@linaro.org>");
+MODULE_DESCRIPTION("Arm CoreSight TPIU (Trace Port Interface Unit) driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/hwtracing/coresight/coresight-trace-id.c b/drivers/hwtracing/coresight/coresight-trace-id.c
new file mode 100644
index 0000000000..af5b4ef59c
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-trace-id.c
@@ -0,0 +1,297 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022, Linaro Limited, All rights reserved.
+ * Author: Mike Leach <mike.leach@linaro.org>
+ */
+#include <linux/coresight-pmu.h>
+#include <linux/cpumask.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include "coresight-trace-id.h"
+
+/* Default trace ID map. Used on systems that don't require per sink mappings */
+static struct coresight_trace_id_map id_map_default;
+
+/* maintain a record of the mapping of IDs and pending releases per cpu */
+static DEFINE_PER_CPU(atomic_t, cpu_id) = ATOMIC_INIT(0);
+static cpumask_t cpu_id_release_pending;
+
+/* perf session active counter */
+static atomic_t perf_cs_etm_session_active = ATOMIC_INIT(0);
+
+/* lock to protect id_map and cpu data  */
+static DEFINE_SPINLOCK(id_map_lock);
+
+/* #define TRACE_ID_DEBUG 1 */
+#if defined(TRACE_ID_DEBUG) || defined(CONFIG_COMPILE_TEST)
+
+static void coresight_trace_id_dump_table(struct coresight_trace_id_map *id_map,
+					  const char *func_name)
+{
+	pr_debug("%s id_map::\n", func_name);
+	pr_debug("Used = %*pb\n", CORESIGHT_TRACE_IDS_MAX, id_map->used_ids);
+	pr_debug("Pend = %*pb\n", CORESIGHT_TRACE_IDS_MAX, id_map->pend_rel_ids);
+}
+#define DUMP_ID_MAP(map)   coresight_trace_id_dump_table(map, __func__)
+#define DUMP_ID_CPU(cpu, id) pr_debug("%s called;  cpu=%d, id=%d\n", __func__, cpu, id)
+#define DUMP_ID(id)   pr_debug("%s called; id=%d\n", __func__, id)
+#define PERF_SESSION(n) pr_debug("%s perf count %d\n", __func__, n)
+#else
+#define DUMP_ID_MAP(map)
+#define DUMP_ID(id)
+#define DUMP_ID_CPU(cpu, id)
+#define PERF_SESSION(n)
+#endif
+
+/* unlocked read of current trace ID value for given CPU */
+static int _coresight_trace_id_read_cpu_id(int cpu)
+{
+	return atomic_read(&per_cpu(cpu_id, cpu));
+}
+
+/* look for next available odd ID, return 0 if none found */
+static int coresight_trace_id_find_odd_id(struct coresight_trace_id_map *id_map)
+{
+	int found_id = 0, bit = 1, next_id;
+
+	while ((bit < CORESIGHT_TRACE_ID_RES_TOP) && !found_id) {
+		/*
+		 * bitmap length of CORESIGHT_TRACE_ID_RES_TOP,
+		 * search from offset `bit`.
+		 */
+		next_id = find_next_zero_bit(id_map->used_ids,
+					     CORESIGHT_TRACE_ID_RES_TOP, bit);
+		if ((next_id < CORESIGHT_TRACE_ID_RES_TOP) && (next_id & 0x1))
+			found_id = next_id;
+		else
+			bit = next_id + 1;
+	}
+	return found_id;
+}
+
+/*
+ * Allocate new ID and set in use
+ *
+ * if @preferred_id is a valid id then try to use that value if available.
+ * if @preferred_id is not valid and @prefer_odd_id is true, try for odd id.
+ *
+ * Otherwise allocate next available ID.
+ */
+static int coresight_trace_id_alloc_new_id(struct coresight_trace_id_map *id_map,
+					   int preferred_id, bool prefer_odd_id)
+{
+	int id = 0;
+
+	/* for backwards compatibility, cpu IDs may use preferred value */
+	if (IS_VALID_CS_TRACE_ID(preferred_id) &&
+	    !test_bit(preferred_id, id_map->used_ids)) {
+		id = preferred_id;
+		goto trace_id_allocated;
+	} else if (prefer_odd_id) {
+	/* may use odd ids to avoid preferred legacy cpu IDs */
+		id = coresight_trace_id_find_odd_id(id_map);
+		if (id)
+			goto trace_id_allocated;
+	}
+
+	/*
+	 * skip reserved bit 0, look at bitmap length of
+	 * CORESIGHT_TRACE_ID_RES_TOP from offset of bit 1.
+	 */
+	id = find_next_zero_bit(id_map->used_ids, CORESIGHT_TRACE_ID_RES_TOP, 1);
+	if (id >= CORESIGHT_TRACE_ID_RES_TOP)
+		return -EINVAL;
+
+	/* mark as used */
+trace_id_allocated:
+	set_bit(id, id_map->used_ids);
+	return id;
+}
+
+static void coresight_trace_id_free(int id, struct coresight_trace_id_map *id_map)
+{
+	if (WARN(!IS_VALID_CS_TRACE_ID(id), "Invalid Trace ID %d\n", id))
+		return;
+	if (WARN(!test_bit(id, id_map->used_ids), "Freeing unused ID %d\n", id))
+		return;
+	clear_bit(id, id_map->used_ids);
+}
+
+static void coresight_trace_id_set_pend_rel(int id, struct coresight_trace_id_map *id_map)
+{
+	if (WARN(!IS_VALID_CS_TRACE_ID(id), "Invalid Trace ID %d\n", id))
+		return;
+	set_bit(id, id_map->pend_rel_ids);
+}
+
+/*
+ * release all pending IDs for all current maps & clear CPU associations
+ *
+ * This currently operates on the default id map, but may be extended to
+ * operate on all registered id maps if per sink id maps are used.
+ */
+static void coresight_trace_id_release_all_pending(void)
+{
+	struct coresight_trace_id_map *id_map = &id_map_default;
+	unsigned long flags;
+	int cpu, bit;
+
+	spin_lock_irqsave(&id_map_lock, flags);
+	for_each_set_bit(bit, id_map->pend_rel_ids, CORESIGHT_TRACE_ID_RES_TOP) {
+		clear_bit(bit, id_map->used_ids);
+		clear_bit(bit, id_map->pend_rel_ids);
+	}
+	for_each_cpu(cpu, &cpu_id_release_pending) {
+		atomic_set(&per_cpu(cpu_id, cpu), 0);
+		cpumask_clear_cpu(cpu, &cpu_id_release_pending);
+	}
+	spin_unlock_irqrestore(&id_map_lock, flags);
+	DUMP_ID_MAP(id_map);
+}
+
+static int coresight_trace_id_map_get_cpu_id(int cpu, struct coresight_trace_id_map *id_map)
+{
+	unsigned long flags;
+	int id;
+
+	spin_lock_irqsave(&id_map_lock, flags);
+
+	/* check for existing allocation for this CPU */
+	id = _coresight_trace_id_read_cpu_id(cpu);
+	if (id)
+		goto get_cpu_id_clr_pend;
+
+	/*
+	 * Find a new ID.
+	 *
+	 * Use legacy values where possible in the dynamic trace ID allocator to
+	 * allow older tools to continue working if they are not upgraded at the
+	 * same time as the kernel drivers.
+	 *
+	 * If the generated legacy ID is invalid, or not available then the next
+	 * available dynamic ID will be used.
+	 */
+	id = coresight_trace_id_alloc_new_id(id_map,
+					     CORESIGHT_LEGACY_CPU_TRACE_ID(cpu),
+					     false);
+	if (!IS_VALID_CS_TRACE_ID(id))
+		goto get_cpu_id_out_unlock;
+
+	/* allocate the new id to the cpu */
+	atomic_set(&per_cpu(cpu_id, cpu), id);
+
+get_cpu_id_clr_pend:
+	/* we are (re)using this ID - so ensure it is not marked for release */
+	cpumask_clear_cpu(cpu, &cpu_id_release_pending);
+	clear_bit(id, id_map->pend_rel_ids);
+
+get_cpu_id_out_unlock:
+	spin_unlock_irqrestore(&id_map_lock, flags);
+
+	DUMP_ID_CPU(cpu, id);
+	DUMP_ID_MAP(id_map);
+	return id;
+}
+
+static void coresight_trace_id_map_put_cpu_id(int cpu, struct coresight_trace_id_map *id_map)
+{
+	unsigned long flags;
+	int id;
+
+	/* check for existing allocation for this CPU */
+	id = _coresight_trace_id_read_cpu_id(cpu);
+	if (!id)
+		return;
+
+	spin_lock_irqsave(&id_map_lock, flags);
+
+	if (atomic_read(&perf_cs_etm_session_active)) {
+		/* set release at pending if perf still active */
+		coresight_trace_id_set_pend_rel(id, id_map);
+		cpumask_set_cpu(cpu, &cpu_id_release_pending);
+	} else {
+		/* otherwise clear id */
+		coresight_trace_id_free(id, id_map);
+		atomic_set(&per_cpu(cpu_id, cpu), 0);
+	}
+
+	spin_unlock_irqrestore(&id_map_lock, flags);
+	DUMP_ID_CPU(cpu, id);
+	DUMP_ID_MAP(id_map);
+}
+
+static int coresight_trace_id_map_get_system_id(struct coresight_trace_id_map *id_map)
+{
+	unsigned long flags;
+	int id;
+
+	spin_lock_irqsave(&id_map_lock, flags);
+	/* prefer odd IDs for system components to avoid legacy CPU IDS */
+	id = coresight_trace_id_alloc_new_id(id_map, 0, true);
+	spin_unlock_irqrestore(&id_map_lock, flags);
+
+	DUMP_ID(id);
+	DUMP_ID_MAP(id_map);
+	return id;
+}
+
+static void coresight_trace_id_map_put_system_id(struct coresight_trace_id_map *id_map, int id)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&id_map_lock, flags);
+	coresight_trace_id_free(id, id_map);
+	spin_unlock_irqrestore(&id_map_lock, flags);
+
+	DUMP_ID(id);
+	DUMP_ID_MAP(id_map);
+}
+
+/* API functions */
+
+int coresight_trace_id_get_cpu_id(int cpu)
+{
+	return coresight_trace_id_map_get_cpu_id(cpu, &id_map_default);
+}
+EXPORT_SYMBOL_GPL(coresight_trace_id_get_cpu_id);
+
+void coresight_trace_id_put_cpu_id(int cpu)
+{
+	coresight_trace_id_map_put_cpu_id(cpu, &id_map_default);
+}
+EXPORT_SYMBOL_GPL(coresight_trace_id_put_cpu_id);
+
+int coresight_trace_id_read_cpu_id(int cpu)
+{
+	return _coresight_trace_id_read_cpu_id(cpu);
+}
+EXPORT_SYMBOL_GPL(coresight_trace_id_read_cpu_id);
+
+int coresight_trace_id_get_system_id(void)
+{
+	return coresight_trace_id_map_get_system_id(&id_map_default);
+}
+EXPORT_SYMBOL_GPL(coresight_trace_id_get_system_id);
+
+void coresight_trace_id_put_system_id(int id)
+{
+	coresight_trace_id_map_put_system_id(&id_map_default, id);
+}
+EXPORT_SYMBOL_GPL(coresight_trace_id_put_system_id);
+
+void coresight_trace_id_perf_start(void)
+{
+	atomic_inc(&perf_cs_etm_session_active);
+	PERF_SESSION(atomic_read(&perf_cs_etm_session_active));
+}
+EXPORT_SYMBOL_GPL(coresight_trace_id_perf_start);
+
+void coresight_trace_id_perf_stop(void)
+{
+	if (!atomic_dec_return(&perf_cs_etm_session_active))
+		coresight_trace_id_release_all_pending();
+	PERF_SESSION(atomic_read(&perf_cs_etm_session_active));
+}
+EXPORT_SYMBOL_GPL(coresight_trace_id_perf_stop);
diff --git a/drivers/hwtracing/coresight/coresight-trace-id.h b/drivers/hwtracing/coresight/coresight-trace-id.h
new file mode 100644
index 0000000000..3797777d36
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-trace-id.h
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(C) 2022 Linaro Limited. All rights reserved.
+ * Author: Mike Leach <mike.leach@linaro.org>
+ */
+
+#ifndef _CORESIGHT_TRACE_ID_H
+#define _CORESIGHT_TRACE_ID_H
+
+/*
+ * Coresight trace ID allocation API
+ *
+ * With multi cpu systems, and more additional trace sources a scalable
+ * trace ID reservation system is required.
+ *
+ * The system will allocate Ids on a demand basis, and allow them to be
+ * released when done.
+ *
+ * In order to ensure that a consistent cpu / ID matching is maintained
+ * throughout a perf cs_etm event session - a session in progress flag will
+ * be maintained, and released IDs not cleared until the perf session is
+ * complete. This allows the same CPU to be re-allocated its prior ID.
+ *
+ *
+ * Trace ID maps will be created and initialised to prevent architecturally
+ * reserved IDs from being allocated.
+ *
+ * API permits multiple maps to be maintained - for large systems where
+ * different sets of cpus trace into different independent sinks.
+ */
+
+#include <linux/bitops.h>
+#include <linux/types.h>
+
+
+/* architecturally we have 128 IDs some of which are reserved */
+#define CORESIGHT_TRACE_IDS_MAX 128
+
+/* ID 0 is reserved */
+#define CORESIGHT_TRACE_ID_RES_0 0
+
+/* ID 0x70 onwards are reserved */
+#define CORESIGHT_TRACE_ID_RES_TOP 0x70
+
+/* check an ID is in the valid range */
+#define IS_VALID_CS_TRACE_ID(id)	\
+	((id > CORESIGHT_TRACE_ID_RES_0) && (id < CORESIGHT_TRACE_ID_RES_TOP))
+
+/**
+ * Trace ID map.
+ *
+ * @used_ids:	Bitmap to register available (bit = 0) and in use (bit = 1) IDs.
+ *		Initialised so that the reserved IDs are permanently marked as
+ *		in use.
+ * @pend_rel_ids: CPU IDs that have been released by the trace source but not
+ *		  yet marked as available, to allow re-allocation to the same
+ *		  CPU during a perf session.
+ */
+struct coresight_trace_id_map {
+	DECLARE_BITMAP(used_ids, CORESIGHT_TRACE_IDS_MAX);
+	DECLARE_BITMAP(pend_rel_ids, CORESIGHT_TRACE_IDS_MAX);
+};
+
+/* Allocate and release IDs for a single default trace ID map */
+
+/**
+ * Read and optionally allocate a CoreSight trace ID and associate with a CPU.
+ *
+ * Function will read the current trace ID for the associated CPU,
+ * allocating an new ID if one is not currently allocated.
+ *
+ * Numeric ID values allocated use legacy allocation algorithm if possible,
+ * otherwise any available ID is used.
+ *
+ * @cpu: The CPU index to allocate for.
+ *
+ * return: CoreSight trace ID or -EINVAL if allocation impossible.
+ */
+int coresight_trace_id_get_cpu_id(int cpu);
+
+/**
+ * Release an allocated trace ID associated with the CPU.
+ *
+ * This will release the CoreSight trace ID associated with the CPU,
+ * unless a perf session is in operation.
+ *
+ * If a perf session is in operation then the ID will be marked as pending
+ * release.
+ *
+ * @cpu: The CPU index to release the associated trace ID.
+ */
+void coresight_trace_id_put_cpu_id(int cpu);
+
+/**
+ * Read the current allocated CoreSight Trace ID value for the CPU.
+ *
+ * Fast read of the current value that does not allocate if no ID allocated
+ * for the CPU.
+ *
+ * Used in perf context  where it is known that the value for the CPU will not
+ * be changing, when perf starts and event on a core and outputs the Trace ID
+ * for the CPU as a packet in the data file. IDs cannot change during a perf
+ * session.
+ *
+ * This function does not take the lock protecting the ID lists, avoiding
+ * locking dependency issues with perf locks.
+ *
+ * @cpu: The CPU index to read.
+ *
+ * return: current value, will be 0 if unallocated.
+ */
+int coresight_trace_id_read_cpu_id(int cpu);
+
+/**
+ * Allocate a CoreSight trace ID for a system component.
+ *
+ * Unconditionally allocates a Trace ID, without associating the ID with a CPU.
+ *
+ * Used to allocate IDs for system trace sources such as STM.
+ *
+ * return: Trace ID or -EINVAL if allocation is impossible.
+ */
+int coresight_trace_id_get_system_id(void);
+
+/**
+ * Release an allocated system trace ID.
+ *
+ * Unconditionally release a trace ID allocated to a system component.
+ *
+ * @id: value of trace ID allocated.
+ */
+void coresight_trace_id_put_system_id(int id);
+
+/* notifiers for perf session start and stop */
+
+/**
+ * Notify the Trace ID allocator that a perf session is starting.
+ *
+ * Increase the perf session reference count - called by perf when setting up
+ * a trace event.
+ *
+ * This reference count is used by the ID allocator to ensure that trace IDs
+ * associated with a CPU cannot change or be released during a perf session.
+ */
+void coresight_trace_id_perf_start(void);
+
+/**
+ * Notify the ID allocator that a perf session is stopping.
+ *
+ * Decrease the perf session reference count.
+ * if this causes the count to go to zero, then all Trace IDs marked as pending
+ * release, will be released.
+ */
+void coresight_trace_id_perf_stop(void);
+
+#endif /* _CORESIGHT_TRACE_ID_H */
diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c
new file mode 100644
index 0000000000..e20c1c6acc
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-trbe.c
@@ -0,0 +1,1570 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This driver enables Trace Buffer Extension (TRBE) as a per-cpu coresight
+ * sink device could then pair with an appropriate per-cpu coresight source
+ * device (ETE) thus generating required trace data. Trace can be enabled
+ * via the perf framework.
+ *
+ * The AUX buffer handling is inspired from Arm SPE PMU driver.
+ *
+ * Copyright (C) 2020 ARM Ltd.
+ *
+ * Author: Anshuman Khandual <anshuman.khandual@arm.com>
+ */
+#define DRVNAME "arm_trbe"
+
+#define pr_fmt(fmt) DRVNAME ": " fmt
+
+#include <asm/barrier.h>
+#include <asm/cpufeature.h>
+
+#include "coresight-self-hosted-trace.h"
+#include "coresight-trbe.h"
+
+#define PERF_IDX2OFF(idx, buf) ((idx) % ((buf)->nr_pages << PAGE_SHIFT))
+
+/*
+ * A padding packet that will help the user space tools
+ * in skipping relevant sections in the captured trace
+ * data which could not be decoded. TRBE doesn't support
+ * formatting the trace data, unlike the legacy CoreSight
+ * sinks and thus we use ETE trace packets to pad the
+ * sections of the buffer.
+ */
+#define ETE_IGNORE_PACKET		0x70
+
+/*
+ * Minimum amount of meaningful trace will contain:
+ * A-Sync, Trace Info, Trace On, Address, Atom.
+ * This is about 44bytes of ETE trace. To be on
+ * the safer side, we assume 64bytes is the minimum
+ * space required for a meaningful session, before
+ * we hit a "WRAP" event.
+ */
+#define TRBE_TRACE_MIN_BUF_SIZE		64
+
+enum trbe_fault_action {
+	TRBE_FAULT_ACT_WRAP,
+	TRBE_FAULT_ACT_SPURIOUS,
+	TRBE_FAULT_ACT_FATAL,
+};
+
+struct trbe_buf {
+	/*
+	 * Even though trbe_base represents vmap()
+	 * mapped allocated buffer's start address,
+	 * it's being as unsigned long for various
+	 * arithmetic and comparision operations &
+	 * also to be consistent with trbe_write &
+	 * trbe_limit sibling pointers.
+	 */
+	unsigned long trbe_base;
+	/* The base programmed into the TRBE */
+	unsigned long trbe_hw_base;
+	unsigned long trbe_limit;
+	unsigned long trbe_write;
+	int nr_pages;
+	void **pages;
+	bool snapshot;
+	struct trbe_cpudata *cpudata;
+};
+
+/*
+ * TRBE erratum list
+ *
+ * The errata are defined in arm64 generic cpu_errata framework.
+ * Since the errata work arounds could be applied individually
+ * to the affected CPUs inside the TRBE driver, we need to know if
+ * a given CPU is affected by the erratum. Unlike the other erratum
+ * work arounds, TRBE driver needs to check multiple times during
+ * a trace session. Thus we need a quicker access to per-CPU
+ * errata and not issue costly this_cpu_has_cap() everytime.
+ * We keep a set of the affected errata in trbe_cpudata, per TRBE.
+ *
+ * We rely on the corresponding cpucaps to be defined for a given
+ * TRBE erratum. We map the given cpucap into a TRBE internal number
+ * to make the tracking of the errata lean.
+ *
+ * This helps in :
+ *   - Not duplicating the detection logic
+ *   - Streamlined detection of erratum across the system
+ */
+#define TRBE_WORKAROUND_OVERWRITE_FILL_MODE	0
+#define TRBE_WORKAROUND_WRITE_OUT_OF_RANGE	1
+#define TRBE_NEEDS_DRAIN_AFTER_DISABLE		2
+#define TRBE_NEEDS_CTXT_SYNC_AFTER_ENABLE	3
+#define TRBE_IS_BROKEN				4
+
+static int trbe_errata_cpucaps[] = {
+	[TRBE_WORKAROUND_OVERWRITE_FILL_MODE] = ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE,
+	[TRBE_WORKAROUND_WRITE_OUT_OF_RANGE] = ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE,
+	[TRBE_NEEDS_DRAIN_AFTER_DISABLE] = ARM64_WORKAROUND_2064142,
+	[TRBE_NEEDS_CTXT_SYNC_AFTER_ENABLE] = ARM64_WORKAROUND_2038923,
+	[TRBE_IS_BROKEN] = ARM64_WORKAROUND_1902691,
+	-1,		/* Sentinel, must be the last entry */
+};
+
+/* The total number of listed errata in trbe_errata_cpucaps */
+#define TRBE_ERRATA_MAX			(ARRAY_SIZE(trbe_errata_cpucaps) - 1)
+
+/*
+ * Safe limit for the number of bytes that may be overwritten
+ * when ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE is triggered.
+ */
+#define TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES	256
+
+/*
+ * struct trbe_cpudata: TRBE instance specific data
+ * @trbe_flag		- TRBE dirty/access flag support
+ * @trbe_hw_align	- Actual TRBE alignment required for TRBPTR_EL1.
+ * @trbe_align		- Software alignment used for the TRBPTR_EL1.
+ * @cpu			- CPU this TRBE belongs to.
+ * @mode		- Mode of current operation. (perf/disabled)
+ * @drvdata		- TRBE specific drvdata
+ * @errata		- Bit map for the errata on this TRBE.
+ */
+struct trbe_cpudata {
+	bool trbe_flag;
+	u64 trbe_hw_align;
+	u64 trbe_align;
+	int cpu;
+	enum cs_mode mode;
+	struct trbe_buf *buf;
+	struct trbe_drvdata *drvdata;
+	DECLARE_BITMAP(errata, TRBE_ERRATA_MAX);
+};
+
+struct trbe_drvdata {
+	struct trbe_cpudata __percpu *cpudata;
+	struct perf_output_handle * __percpu *handle;
+	struct hlist_node hotplug_node;
+	int irq;
+	cpumask_t supported_cpus;
+	enum cpuhp_state trbe_online;
+	struct platform_device *pdev;
+};
+
+static void trbe_check_errata(struct trbe_cpudata *cpudata)
+{
+	int i;
+
+	for (i = 0; i < TRBE_ERRATA_MAX; i++) {
+		int cap = trbe_errata_cpucaps[i];
+
+		if (WARN_ON_ONCE(cap < 0))
+			return;
+		if (this_cpu_has_cap(cap))
+			set_bit(i, cpudata->errata);
+	}
+}
+
+static inline bool trbe_has_erratum(struct trbe_cpudata *cpudata, int i)
+{
+	return (i < TRBE_ERRATA_MAX) && test_bit(i, cpudata->errata);
+}
+
+static inline bool trbe_may_overwrite_in_fill_mode(struct trbe_cpudata *cpudata)
+{
+	return trbe_has_erratum(cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE);
+}
+
+static inline bool trbe_may_write_out_of_range(struct trbe_cpudata *cpudata)
+{
+	return trbe_has_erratum(cpudata, TRBE_WORKAROUND_WRITE_OUT_OF_RANGE);
+}
+
+static inline bool trbe_needs_drain_after_disable(struct trbe_cpudata *cpudata)
+{
+	/*
+	 * Errata affected TRBE implementation will need TSB CSYNC and
+	 * DSB in order to prevent subsequent writes into certain TRBE
+	 * system registers from being ignored and not effected.
+	 */
+	return trbe_has_erratum(cpudata, TRBE_NEEDS_DRAIN_AFTER_DISABLE);
+}
+
+static inline bool trbe_needs_ctxt_sync_after_enable(struct trbe_cpudata *cpudata)
+{
+	/*
+	 * Errata affected TRBE implementation will need an additional
+	 * context synchronization in order to prevent an inconsistent
+	 * TRBE prohibited region view on the CPU which could possibly
+	 * corrupt the TRBE buffer or the TRBE state.
+	 */
+	return trbe_has_erratum(cpudata, TRBE_NEEDS_CTXT_SYNC_AFTER_ENABLE);
+}
+
+static inline bool trbe_is_broken(struct trbe_cpudata *cpudata)
+{
+	return trbe_has_erratum(cpudata, TRBE_IS_BROKEN);
+}
+
+static int trbe_alloc_node(struct perf_event *event)
+{
+	if (event->cpu == -1)
+		return NUMA_NO_NODE;
+	return cpu_to_node(event->cpu);
+}
+
+static inline void trbe_drain_buffer(void)
+{
+	tsb_csync();
+	dsb(nsh);
+}
+
+static inline void set_trbe_enabled(struct trbe_cpudata *cpudata, u64 trblimitr)
+{
+	/*
+	 * Enable the TRBE without clearing LIMITPTR which
+	 * might be required for fetching the buffer limits.
+	 */
+	trblimitr |= TRBLIMITR_EL1_E;
+	write_sysreg_s(trblimitr, SYS_TRBLIMITR_EL1);
+
+	/* Synchronize the TRBE enable event */
+	isb();
+
+	if (trbe_needs_ctxt_sync_after_enable(cpudata))
+		isb();
+}
+
+static inline void set_trbe_disabled(struct trbe_cpudata *cpudata)
+{
+	u64 trblimitr = read_sysreg_s(SYS_TRBLIMITR_EL1);
+
+	/*
+	 * Disable the TRBE without clearing LIMITPTR which
+	 * might be required for fetching the buffer limits.
+	 */
+	trblimitr &= ~TRBLIMITR_EL1_E;
+	write_sysreg_s(trblimitr, SYS_TRBLIMITR_EL1);
+
+	if (trbe_needs_drain_after_disable(cpudata))
+		trbe_drain_buffer();
+	isb();
+}
+
+static void trbe_drain_and_disable_local(struct trbe_cpudata *cpudata)
+{
+	trbe_drain_buffer();
+	set_trbe_disabled(cpudata);
+}
+
+static void trbe_reset_local(struct trbe_cpudata *cpudata)
+{
+	trbe_drain_and_disable_local(cpudata);
+	write_sysreg_s(0, SYS_TRBLIMITR_EL1);
+	write_sysreg_s(0, SYS_TRBPTR_EL1);
+	write_sysreg_s(0, SYS_TRBBASER_EL1);
+	write_sysreg_s(0, SYS_TRBSR_EL1);
+}
+
+static void trbe_report_wrap_event(struct perf_output_handle *handle)
+{
+	/*
+	 * Mark the buffer to indicate that there was a WRAP event by
+	 * setting the COLLISION flag. This indicates to the user that
+	 * the TRBE trace collection was stopped without stopping the
+	 * ETE and thus there might be some amount of trace that was
+	 * lost between the time the WRAP was detected and the IRQ
+	 * was consumed by the CPU.
+	 *
+	 * Setting the TRUNCATED flag would move the event to STOPPED
+	 * state unnecessarily, even when there is space left in the
+	 * ring buffer. Using the COLLISION flag doesn't have this side
+	 * effect. We only set TRUNCATED flag when there is no space
+	 * left in the ring buffer.
+	 */
+	perf_aux_output_flag(handle, PERF_AUX_FLAG_COLLISION);
+}
+
+static void trbe_stop_and_truncate_event(struct perf_output_handle *handle)
+{
+	struct trbe_buf *buf = etm_perf_sink_config(handle);
+
+	/*
+	 * We cannot proceed with the buffer collection and we
+	 * do not have any data for the current session. The
+	 * etm_perf driver expects to close out the aux_buffer
+	 * at event_stop(). So disable the TRBE here and leave
+	 * the update_buffer() to return a 0 size.
+	 */
+	trbe_drain_and_disable_local(buf->cpudata);
+	perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
+	perf_aux_output_end(handle, 0);
+	*this_cpu_ptr(buf->cpudata->drvdata->handle) = NULL;
+}
+
+/*
+ * TRBE Buffer Management
+ *
+ * The TRBE buffer spans from the base pointer till the limit pointer. When enabled,
+ * it starts writing trace data from the write pointer onward till the limit pointer.
+ * When the write pointer reaches the address just before the limit pointer, it gets
+ * wrapped around again to the base pointer. This is called a TRBE wrap event, which
+ * generates a maintenance interrupt when operated in WRAP or FILL mode. This driver
+ * uses FILL mode, where the TRBE stops the trace collection at wrap event. The IRQ
+ * handler updates the AUX buffer and re-enables the TRBE with updated WRITE and
+ * LIMIT pointers.
+ *
+ *	Wrap around with an IRQ
+ *	------ < ------ < ------- < ----- < -----
+ *	|					|
+ *	------ > ------ > ------- > ----- > -----
+ *
+ *	+---------------+-----------------------+
+ *	|		|			|
+ *	+---------------+-----------------------+
+ *	Base Pointer	Write Pointer		Limit Pointer
+ *
+ * The base and limit pointers always needs to be PAGE_SIZE aligned. But the write
+ * pointer can be aligned to the implementation defined TRBE trace buffer alignment
+ * as captured in trbe_cpudata->trbe_align.
+ *
+ *
+ *		head		tail		wakeup
+ *	+---------------------------------------+----- ~ ~ ------
+ *	|$$$$$$$|################|$$$$$$$$$$$$$$|		|
+ *	+---------------------------------------+----- ~ ~ ------
+ *	Base Pointer	Write Pointer		Limit Pointer
+ *
+ * The perf_output_handle indices (head, tail, wakeup) are monotonically increasing
+ * values which tracks all the driver writes and user reads from the perf auxiliary
+ * buffer. Generally [head..tail] is the area where the driver can write into unless
+ * the wakeup is behind the tail. Enabled TRBE buffer span needs to be adjusted and
+ * configured depending on the perf_output_handle indices, so that the driver does
+ * not override into areas in the perf auxiliary buffer which is being or yet to be
+ * consumed from the user space. The enabled TRBE buffer area is a moving subset of
+ * the allocated perf auxiliary buffer.
+ */
+
+static void __trbe_pad_buf(struct trbe_buf *buf, u64 offset, int len)
+{
+	memset((void *)buf->trbe_base + offset, ETE_IGNORE_PACKET, len);
+}
+
+static void trbe_pad_buf(struct perf_output_handle *handle, int len)
+{
+	struct trbe_buf *buf = etm_perf_sink_config(handle);
+	u64 head = PERF_IDX2OFF(handle->head, buf);
+
+	__trbe_pad_buf(buf, head, len);
+	if (!buf->snapshot)
+		perf_aux_output_skip(handle, len);
+}
+
+static unsigned long trbe_snapshot_offset(struct perf_output_handle *handle)
+{
+	struct trbe_buf *buf = etm_perf_sink_config(handle);
+
+	/*
+	 * The ETE trace has alignment synchronization packets allowing
+	 * the decoder to reset in case of an overflow or corruption.
+	 * So we can use the entire buffer for the snapshot mode.
+	 */
+	return buf->nr_pages * PAGE_SIZE;
+}
+
+static u64 trbe_min_trace_buf_size(struct perf_output_handle *handle)
+{
+	u64 size = TRBE_TRACE_MIN_BUF_SIZE;
+	struct trbe_buf *buf = etm_perf_sink_config(handle);
+	struct trbe_cpudata *cpudata = buf->cpudata;
+
+	/*
+	 * When the TRBE is affected by an erratum that could make it
+	 * write to the next "virtually addressed" page beyond the LIMIT.
+	 * We need to make sure there is always a PAGE after the LIMIT,
+	 * within the buffer. Thus we ensure there is at least an extra
+	 * page than normal. With this we could then adjust the LIMIT
+	 * pointer down by a PAGE later.
+	 */
+	if (trbe_may_write_out_of_range(cpudata))
+		size += PAGE_SIZE;
+	return size;
+}
+
+/*
+ * TRBE Limit Calculation
+ *
+ * The following markers are used to illustrate various TRBE buffer situations.
+ *
+ * $$$$ - Data area, unconsumed captured trace data, not to be overridden
+ * #### - Free area, enabled, trace will be written
+ * %%%% - Free area, disabled, trace will not be written
+ * ==== - Free area, padded with ETE_IGNORE_PACKET, trace will be skipped
+ */
+static unsigned long __trbe_normal_offset(struct perf_output_handle *handle)
+{
+	struct trbe_buf *buf = etm_perf_sink_config(handle);
+	struct trbe_cpudata *cpudata = buf->cpudata;
+	const u64 bufsize = buf->nr_pages * PAGE_SIZE;
+	u64 limit = bufsize;
+	u64 head, tail, wakeup;
+
+	head = PERF_IDX2OFF(handle->head, buf);
+
+	/*
+	 *		head
+	 *	------->|
+	 *	|
+	 *	head	TRBE align	tail
+	 * +----|-------|---------------|-------+
+	 * |$$$$|=======|###############|$$$$$$$|
+	 * +----|-------|---------------|-------+
+	 * trbe_base				trbe_base + nr_pages
+	 *
+	 * Perf aux buffer output head position can be misaligned depending on
+	 * various factors including user space reads. In case misaligned, head
+	 * needs to be aligned before TRBE can be configured. Pad the alignment
+	 * gap with ETE_IGNORE_PACKET bytes that will be ignored by user tools
+	 * and skip this section thus advancing the head.
+	 */
+	if (!IS_ALIGNED(head, cpudata->trbe_align)) {
+		unsigned long delta = roundup(head, cpudata->trbe_align) - head;
+
+		delta = min(delta, handle->size);
+		trbe_pad_buf(handle, delta);
+		head = PERF_IDX2OFF(handle->head, buf);
+	}
+
+	/*
+	 *	head = tail (size = 0)
+	 * +----|-------------------------------+
+	 * |$$$$|$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$	|
+	 * +----|-------------------------------+
+	 * trbe_base				trbe_base + nr_pages
+	 *
+	 * Perf aux buffer does not have any space for the driver to write into.
+	 */
+	if (!handle->size)
+		return 0;
+
+	/* Compute the tail and wakeup indices now that we've aligned head */
+	tail = PERF_IDX2OFF(handle->head + handle->size, buf);
+	wakeup = PERF_IDX2OFF(handle->wakeup, buf);
+
+	/*
+	 * Lets calculate the buffer area which TRBE could write into. There
+	 * are three possible scenarios here. Limit needs to be aligned with
+	 * PAGE_SIZE per the TRBE requirement. Always avoid clobbering the
+	 * unconsumed data.
+	 *
+	 * 1) head < tail
+	 *
+	 *	head			tail
+	 * +----|-----------------------|-------+
+	 * |$$$$|#######################|$$$$$$$|
+	 * +----|-----------------------|-------+
+	 * trbe_base			limit	trbe_base + nr_pages
+	 *
+	 * TRBE could write into [head..tail] area. Unless the tail is right at
+	 * the end of the buffer, neither an wrap around nor an IRQ is expected
+	 * while being enabled.
+	 *
+	 * 2) head == tail
+	 *
+	 *	head = tail (size > 0)
+	 * +----|-------------------------------+
+	 * |%%%%|###############################|
+	 * +----|-------------------------------+
+	 * trbe_base				limit = trbe_base + nr_pages
+	 *
+	 * TRBE should just write into [head..base + nr_pages] area even though
+	 * the entire buffer is empty. Reason being, when the trace reaches the
+	 * end of the buffer, it will just wrap around with an IRQ giving an
+	 * opportunity to reconfigure the buffer.
+	 *
+	 * 3) tail < head
+	 *
+	 *	tail			head
+	 * +----|-----------------------|-------+
+	 * |%%%%|$$$$$$$$$$$$$$$$$$$$$$$|#######|
+	 * +----|-----------------------|-------+
+	 * trbe_base				limit = trbe_base + nr_pages
+	 *
+	 * TRBE should just write into [head..base + nr_pages] area even though
+	 * the [trbe_base..tail] is also empty. Reason being, when the trace
+	 * reaches the end of the buffer, it will just wrap around with an IRQ
+	 * giving an opportunity to reconfigure the buffer.
+	 */
+	if (head < tail)
+		limit = round_down(tail, PAGE_SIZE);
+
+	/*
+	 * Wakeup may be arbitrarily far into the future. If it's not in the
+	 * current generation, either we'll wrap before hitting it, or it's
+	 * in the past and has been handled already.
+	 *
+	 * If there's a wakeup before we wrap, arrange to be woken up by the
+	 * page boundary following it. Keep the tail boundary if that's lower.
+	 *
+	 *	head		wakeup	tail
+	 * +----|---------------|-------|-------+
+	 * |$$$$|###############|%%%%%%%|$$$$$$$|
+	 * +----|---------------|-------|-------+
+	 * trbe_base		limit		trbe_base + nr_pages
+	 */
+	if (handle->wakeup < (handle->head + handle->size) && head <= wakeup)
+		limit = min(limit, round_up(wakeup, PAGE_SIZE));
+
+	/*
+	 * There are two situation when this can happen i.e limit is before
+	 * the head and hence TRBE cannot be configured.
+	 *
+	 * 1) head < tail (aligned down with PAGE_SIZE) and also they are both
+	 * within the same PAGE size range.
+	 *
+	 *			PAGE_SIZE
+	 *		|----------------------|
+	 *
+	 *		limit	head	tail
+	 * +------------|------|--------|-------+
+	 * |$$$$$$$$$$$$$$$$$$$|========|$$$$$$$|
+	 * +------------|------|--------|-------+
+	 * trbe_base				trbe_base + nr_pages
+	 *
+	 * 2) head < wakeup (aligned up with PAGE_SIZE) < tail and also both
+	 * head and wakeup are within same PAGE size range.
+	 *
+	 *		PAGE_SIZE
+	 *	|----------------------|
+	 *
+	 *	limit	head	wakeup  tail
+	 * +----|------|-------|--------|-------+
+	 * |$$$$$$$$$$$|=======|========|$$$$$$$|
+	 * +----|------|-------|--------|-------+
+	 * trbe_base				trbe_base + nr_pages
+	 */
+	if (limit > head)
+		return limit;
+
+	trbe_pad_buf(handle, handle->size);
+	return 0;
+}
+
+static unsigned long trbe_normal_offset(struct perf_output_handle *handle)
+{
+	struct trbe_buf *buf = etm_perf_sink_config(handle);
+	u64 limit = __trbe_normal_offset(handle);
+	u64 head = PERF_IDX2OFF(handle->head, buf);
+
+	/*
+	 * If the head is too close to the limit and we don't
+	 * have space for a meaningful run, we rather pad it
+	 * and start fresh.
+	 *
+	 * We might have to do this more than once to make sure
+	 * we have enough required space.
+	 */
+	while (limit && ((limit - head) < trbe_min_trace_buf_size(handle))) {
+		trbe_pad_buf(handle, limit - head);
+		limit = __trbe_normal_offset(handle);
+		head = PERF_IDX2OFF(handle->head, buf);
+	}
+	return limit;
+}
+
+static unsigned long compute_trbe_buffer_limit(struct perf_output_handle *handle)
+{
+	struct trbe_buf *buf = etm_perf_sink_config(handle);
+	unsigned long offset;
+
+	if (buf->snapshot)
+		offset = trbe_snapshot_offset(handle);
+	else
+		offset = trbe_normal_offset(handle);
+	return buf->trbe_base + offset;
+}
+
+static void clr_trbe_status(void)
+{
+	u64 trbsr = read_sysreg_s(SYS_TRBSR_EL1);
+
+	WARN_ON(is_trbe_enabled());
+	trbsr &= ~TRBSR_EL1_IRQ;
+	trbsr &= ~TRBSR_EL1_TRG;
+	trbsr &= ~TRBSR_EL1_WRAP;
+	trbsr &= ~TRBSR_EL1_EC_MASK;
+	trbsr &= ~TRBSR_EL1_BSC_MASK;
+	trbsr &= ~TRBSR_EL1_S;
+	write_sysreg_s(trbsr, SYS_TRBSR_EL1);
+}
+
+static void set_trbe_limit_pointer_enabled(struct trbe_buf *buf)
+{
+	u64 trblimitr = read_sysreg_s(SYS_TRBLIMITR_EL1);
+	unsigned long addr = buf->trbe_limit;
+
+	WARN_ON(!IS_ALIGNED(addr, (1UL << TRBLIMITR_EL1_LIMIT_SHIFT)));
+	WARN_ON(!IS_ALIGNED(addr, PAGE_SIZE));
+
+	trblimitr &= ~TRBLIMITR_EL1_nVM;
+	trblimitr &= ~TRBLIMITR_EL1_FM_MASK;
+	trblimitr &= ~TRBLIMITR_EL1_TM_MASK;
+	trblimitr &= ~TRBLIMITR_EL1_LIMIT_MASK;
+
+	/*
+	 * Fill trace buffer mode is used here while configuring the
+	 * TRBE for trace capture. In this particular mode, the trace
+	 * collection is stopped and a maintenance interrupt is raised
+	 * when the current write pointer wraps. This pause in trace
+	 * collection gives the software an opportunity to capture the
+	 * trace data in the interrupt handler, before reconfiguring
+	 * the TRBE.
+	 */
+	trblimitr |= (TRBLIMITR_EL1_FM_FILL << TRBLIMITR_EL1_FM_SHIFT) &
+		     TRBLIMITR_EL1_FM_MASK;
+
+	/*
+	 * Trigger mode is not used here while configuring the TRBE for
+	 * the trace capture. Hence just keep this in the ignore mode.
+	 */
+	trblimitr |= (TRBLIMITR_EL1_TM_IGNR << TRBLIMITR_EL1_TM_SHIFT) &
+		     TRBLIMITR_EL1_TM_MASK;
+	trblimitr |= (addr & PAGE_MASK);
+	set_trbe_enabled(buf->cpudata, trblimitr);
+}
+
+static void trbe_enable_hw(struct trbe_buf *buf)
+{
+	WARN_ON(buf->trbe_hw_base < buf->trbe_base);
+	WARN_ON(buf->trbe_write < buf->trbe_hw_base);
+	WARN_ON(buf->trbe_write >= buf->trbe_limit);
+	set_trbe_disabled(buf->cpudata);
+	clr_trbe_status();
+	set_trbe_base_pointer(buf->trbe_hw_base);
+	set_trbe_write_pointer(buf->trbe_write);
+
+	/*
+	 * Synchronize all the register updates
+	 * till now before enabling the TRBE.
+	 */
+	isb();
+	set_trbe_limit_pointer_enabled(buf);
+}
+
+static enum trbe_fault_action trbe_get_fault_act(struct perf_output_handle *handle,
+						 u64 trbsr)
+{
+	int ec = get_trbe_ec(trbsr);
+	int bsc = get_trbe_bsc(trbsr);
+	struct trbe_buf *buf = etm_perf_sink_config(handle);
+	struct trbe_cpudata *cpudata = buf->cpudata;
+
+	WARN_ON(is_trbe_running(trbsr));
+	if (is_trbe_trg(trbsr) || is_trbe_abort(trbsr))
+		return TRBE_FAULT_ACT_FATAL;
+
+	if ((ec == TRBE_EC_STAGE1_ABORT) || (ec == TRBE_EC_STAGE2_ABORT))
+		return TRBE_FAULT_ACT_FATAL;
+
+	/*
+	 * If the trbe is affected by TRBE_WORKAROUND_OVERWRITE_FILL_MODE,
+	 * it might write data after a WRAP event in the fill mode.
+	 * Thus the check TRBPTR == TRBBASER will not be honored.
+	 */
+	if ((is_trbe_wrap(trbsr) && (ec == TRBE_EC_OTHERS) && (bsc == TRBE_BSC_FILLED)) &&
+	    (trbe_may_overwrite_in_fill_mode(cpudata) ||
+	     get_trbe_write_pointer() == get_trbe_base_pointer()))
+		return TRBE_FAULT_ACT_WRAP;
+
+	return TRBE_FAULT_ACT_SPURIOUS;
+}
+
+static unsigned long trbe_get_trace_size(struct perf_output_handle *handle,
+					 struct trbe_buf *buf, bool wrap)
+{
+	u64 write;
+	u64 start_off, end_off;
+	u64 size;
+	u64 overwrite_skip = TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES;
+
+	/*
+	 * If the TRBE has wrapped around the write pointer has
+	 * wrapped and should be treated as limit.
+	 *
+	 * When the TRBE is affected by TRBE_WORKAROUND_WRITE_OUT_OF_RANGE,
+	 * it may write upto 64bytes beyond the "LIMIT". The driver already
+	 * keeps a valid page next to the LIMIT and we could potentially
+	 * consume the trace data that may have been collected there. But we
+	 * cannot be really sure it is available, and the TRBPTR may not
+	 * indicate the same. Also, affected cores are also affected by another
+	 * erratum which forces the PAGE_SIZE alignment on the TRBPTR, and thus
+	 * could potentially pad an entire PAGE_SIZE - 64bytes, to get those
+	 * 64bytes. Thus we ignore the potential triggering of the erratum
+	 * on WRAP and limit the data to LIMIT.
+	 */
+	if (wrap)
+		write = get_trbe_limit_pointer();
+	else
+		write = get_trbe_write_pointer();
+
+	/*
+	 * TRBE may use a different base address than the base
+	 * of the ring buffer. Thus use the beginning of the ring
+	 * buffer to compute the offsets.
+	 */
+	end_off = write - buf->trbe_base;
+	start_off = PERF_IDX2OFF(handle->head, buf);
+
+	if (WARN_ON_ONCE(end_off < start_off))
+		return 0;
+
+	size = end_off - start_off;
+	/*
+	 * If the TRBE is affected by the following erratum, we must fill
+	 * the space we skipped with IGNORE packets. And we are always
+	 * guaranteed to have at least a PAGE_SIZE space in the buffer.
+	 */
+	if (trbe_has_erratum(buf->cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE) &&
+	    !WARN_ON(size < overwrite_skip))
+		__trbe_pad_buf(buf, start_off, overwrite_skip);
+
+	return size;
+}
+
+static void *arm_trbe_alloc_buffer(struct coresight_device *csdev,
+				   struct perf_event *event, void **pages,
+				   int nr_pages, bool snapshot)
+{
+	struct trbe_buf *buf;
+	struct page **pglist;
+	int i;
+
+	/*
+	 * TRBE LIMIT and TRBE WRITE pointers must be page aligned. But with
+	 * just a single page, there would not be any room left while writing
+	 * into a partially filled TRBE buffer after the page size alignment.
+	 * Hence restrict the minimum buffer size as two pages.
+	 */
+	if (nr_pages < 2)
+		return NULL;
+
+	buf = kzalloc_node(sizeof(*buf), GFP_KERNEL, trbe_alloc_node(event));
+	if (!buf)
+		return ERR_PTR(-ENOMEM);
+
+	pglist = kcalloc(nr_pages, sizeof(*pglist), GFP_KERNEL);
+	if (!pglist) {
+		kfree(buf);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	for (i = 0; i < nr_pages; i++)
+		pglist[i] = virt_to_page(pages[i]);
+
+	buf->trbe_base = (unsigned long)vmap(pglist, nr_pages, VM_MAP, PAGE_KERNEL);
+	if (!buf->trbe_base) {
+		kfree(pglist);
+		kfree(buf);
+		return ERR_PTR(-ENOMEM);
+	}
+	buf->trbe_limit = buf->trbe_base + nr_pages * PAGE_SIZE;
+	buf->trbe_write = buf->trbe_base;
+	buf->snapshot = snapshot;
+	buf->nr_pages = nr_pages;
+	buf->pages = pages;
+	kfree(pglist);
+	return buf;
+}
+
+static void arm_trbe_free_buffer(void *config)
+{
+	struct trbe_buf *buf = config;
+
+	vunmap((void *)buf->trbe_base);
+	kfree(buf);
+}
+
+static unsigned long arm_trbe_update_buffer(struct coresight_device *csdev,
+					    struct perf_output_handle *handle,
+					    void *config)
+{
+	struct trbe_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct trbe_cpudata *cpudata = dev_get_drvdata(&csdev->dev);
+	struct trbe_buf *buf = config;
+	enum trbe_fault_action act;
+	unsigned long size, status;
+	unsigned long flags;
+	bool wrap = false;
+
+	WARN_ON(buf->cpudata != cpudata);
+	WARN_ON(cpudata->cpu != smp_processor_id());
+	WARN_ON(cpudata->drvdata != drvdata);
+	if (cpudata->mode != CS_MODE_PERF)
+		return 0;
+
+	/*
+	 * We are about to disable the TRBE. And this could in turn
+	 * fill up the buffer triggering, an IRQ. This could be consumed
+	 * by the PE asynchronously, causing a race here against
+	 * the IRQ handler in closing out the handle. So, let us
+	 * make sure the IRQ can't trigger while we are collecting
+	 * the buffer. We also make sure that a WRAP event is handled
+	 * accordingly.
+	 */
+	local_irq_save(flags);
+
+	/*
+	 * If the TRBE was disabled due to lack of space in the AUX buffer or a
+	 * spurious fault, the driver leaves it disabled, truncating the buffer.
+	 * Since the etm_perf driver expects to close out the AUX buffer, the
+	 * driver skips it. Thus, just pass in 0 size here to indicate that the
+	 * buffer was truncated.
+	 */
+	if (!is_trbe_enabled()) {
+		size = 0;
+		goto done;
+	}
+	/*
+	 * perf handle structure needs to be shared with the TRBE IRQ handler for
+	 * capturing trace data and restarting the handle. There is a probability
+	 * of an undefined reference based crash when etm event is being stopped
+	 * while a TRBE IRQ also getting processed. This happens due the release
+	 * of perf handle via perf_aux_output_end() in etm_event_stop(). Stopping
+	 * the TRBE here will ensure that no IRQ could be generated when the perf
+	 * handle gets freed in etm_event_stop().
+	 */
+	trbe_drain_and_disable_local(cpudata);
+
+	/* Check if there is a pending interrupt and handle it here */
+	status = read_sysreg_s(SYS_TRBSR_EL1);
+	if (is_trbe_irq(status)) {
+
+		/*
+		 * Now that we are handling the IRQ here, clear the IRQ
+		 * from the status, to let the irq handler know that it
+		 * is taken care of.
+		 */
+		clr_trbe_irq();
+		isb();
+
+		act = trbe_get_fault_act(handle, status);
+		/*
+		 * If this was not due to a WRAP event, we have some
+		 * errors and as such buffer is empty.
+		 */
+		if (act != TRBE_FAULT_ACT_WRAP) {
+			size = 0;
+			goto done;
+		}
+
+		trbe_report_wrap_event(handle);
+		wrap = true;
+	}
+
+	size = trbe_get_trace_size(handle, buf, wrap);
+
+done:
+	local_irq_restore(flags);
+
+	if (buf->snapshot)
+		handle->head += size;
+	return size;
+}
+
+
+static int trbe_apply_work_around_before_enable(struct trbe_buf *buf)
+{
+	/*
+	 * TRBE_WORKAROUND_OVERWRITE_FILL_MODE causes the TRBE to overwrite a few cache
+	 * line size from the "TRBBASER_EL1" in the event of a "FILL".
+	 * Thus, we could loose some amount of the trace at the base.
+	 *
+	 * Before Fix:
+	 *
+	 *  normal-BASE     head (normal-TRBPTR)         tail (normal-LIMIT)
+	 *  |                   \/                       /
+	 *   -------------------------------------------------------------
+	 *  |   Pg0      |   Pg1       |           |          |  PgN     |
+	 *   -------------------------------------------------------------
+	 *
+	 * In the normal course of action, we would set the TRBBASER to the
+	 * beginning of the ring-buffer (normal-BASE). But with the erratum,
+	 * the TRBE could overwrite the contents at the "normal-BASE", after
+	 * hitting the "normal-LIMIT", since it doesn't stop as expected. And
+	 * this is wrong. This could result in overwriting trace collected in
+	 * one of the previous runs, being consumed by the user. So we must
+	 * always make sure that the TRBBASER is within the region
+	 * [head, head+size]. Note that TRBBASER must be PAGE aligned,
+	 *
+	 *  After moving the BASE:
+	 *
+	 *  normal-BASE     head (normal-TRBPTR)         tail (normal-LIMIT)
+	 *  |                   \/                       /
+	 *   -------------------------------------------------------------
+	 *  |         |          |xyzdef.     |..   tuvw|                |
+	 *   -------------------------------------------------------------
+	 *                      /
+	 *              New-BASER
+	 *
+	 * Also, we would set the TRBPTR to head (after adjusting for
+	 * alignment) at normal-PTR. This would mean that the last few bytes
+	 * of the trace (say, "xyz") might overwrite the first few bytes of
+	 * trace written ("abc"). More importantly they will appear in what
+	 * userspace sees as the beginning of the trace, which is wrong. We may
+	 * not always have space to move the latest trace "xyz" to the correct
+	 * order as it must appear beyond the LIMIT. (i.e, [head..head+size]).
+	 * Thus it is easier to ignore those bytes than to complicate the
+	 * driver to move it, assuming that the erratum was triggered and
+	 * doing additional checks to see if there is indeed allowed space at
+	 * TRBLIMITR.LIMIT.
+	 *
+	 *  Thus the full workaround will move the BASE and the PTR and would
+	 *  look like (after padding at the skipped bytes at the end of
+	 *  session) :
+	 *
+	 *  normal-BASE     head (normal-TRBPTR)         tail (normal-LIMIT)
+	 *  |                   \/                       /
+	 *   -------------------------------------------------------------
+	 *  |         |          |///abc..     |..  rst|                |
+	 *   -------------------------------------------------------------
+	 *                      /    |
+	 *              New-BASER    New-TRBPTR
+	 *
+	 * To summarize, with the work around:
+	 *
+	 *  - We always align the offset for the next session to PAGE_SIZE
+	 *    (This is to ensure we can program the TRBBASER to this offset
+	 *    within the region [head...head+size]).
+	 *
+	 *  - At TRBE enable:
+	 *     - Set the TRBBASER to the page aligned offset of the current
+	 *       proposed write offset. (which is guaranteed to be aligned
+	 *       as above)
+	 *     - Move the TRBPTR to skip first 256bytes (that might be
+	 *       overwritten with the erratum). This ensures that the trace
+	 *       generated in the session is not re-written.
+	 *
+	 *  - At trace collection:
+	 *     - Pad the 256bytes skipped above again with IGNORE packets.
+	 */
+	if (trbe_has_erratum(buf->cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE)) {
+		if (WARN_ON(!IS_ALIGNED(buf->trbe_write, PAGE_SIZE)))
+			return -EINVAL;
+		buf->trbe_hw_base = buf->trbe_write;
+		buf->trbe_write += TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES;
+	}
+
+	/*
+	 * TRBE_WORKAROUND_WRITE_OUT_OF_RANGE could cause the TRBE to write to
+	 * the next page after the TRBLIMITR.LIMIT. For perf, the "next page"
+	 * may be:
+	 *     - The page beyond the ring buffer. This could mean, TRBE could
+	 *       corrupt another entity (kernel / user)
+	 *     - A portion of the "ring buffer" consumed by the userspace.
+	 *       i.e, a page outisde [head, head + size].
+	 *
+	 * We work around this by:
+	 *     - Making sure that we have at least an extra space of PAGE left
+	 *       in the ring buffer [head, head + size], than we normally do
+	 *       without the erratum. See trbe_min_trace_buf_size().
+	 *
+	 *     - Adjust the TRBLIMITR.LIMIT to leave the extra PAGE outside
+	 *       the TRBE's range (i.e [TRBBASER, TRBLIMITR.LIMI] ).
+	 */
+	if (trbe_has_erratum(buf->cpudata, TRBE_WORKAROUND_WRITE_OUT_OF_RANGE)) {
+		s64 space = buf->trbe_limit - buf->trbe_write;
+		/*
+		 * We must have more than a PAGE_SIZE worth space in the proposed
+		 * range for the TRBE.
+		 */
+		if (WARN_ON(space <= PAGE_SIZE ||
+			    !IS_ALIGNED(buf->trbe_limit, PAGE_SIZE)))
+			return -EINVAL;
+		buf->trbe_limit -= PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+static int __arm_trbe_enable(struct trbe_buf *buf,
+			     struct perf_output_handle *handle)
+{
+	int ret = 0;
+
+	perf_aux_output_flag(handle, PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
+	buf->trbe_limit = compute_trbe_buffer_limit(handle);
+	buf->trbe_write = buf->trbe_base + PERF_IDX2OFF(handle->head, buf);
+	if (buf->trbe_limit == buf->trbe_base) {
+		ret = -ENOSPC;
+		goto err;
+	}
+	/* Set the base of the TRBE to the buffer base */
+	buf->trbe_hw_base = buf->trbe_base;
+
+	ret = trbe_apply_work_around_before_enable(buf);
+	if (ret)
+		goto err;
+
+	*this_cpu_ptr(buf->cpudata->drvdata->handle) = handle;
+	trbe_enable_hw(buf);
+	return 0;
+err:
+	trbe_stop_and_truncate_event(handle);
+	return ret;
+}
+
+static int arm_trbe_enable(struct coresight_device *csdev, enum cs_mode mode,
+			   void *data)
+{
+	struct trbe_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct trbe_cpudata *cpudata = dev_get_drvdata(&csdev->dev);
+	struct perf_output_handle *handle = data;
+	struct trbe_buf *buf = etm_perf_sink_config(handle);
+
+	WARN_ON(cpudata->cpu != smp_processor_id());
+	WARN_ON(cpudata->drvdata != drvdata);
+	if (mode != CS_MODE_PERF)
+		return -EINVAL;
+
+	cpudata->buf = buf;
+	cpudata->mode = mode;
+	buf->cpudata = cpudata;
+
+	return __arm_trbe_enable(buf, handle);
+}
+
+static int arm_trbe_disable(struct coresight_device *csdev)
+{
+	struct trbe_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct trbe_cpudata *cpudata = dev_get_drvdata(&csdev->dev);
+	struct trbe_buf *buf = cpudata->buf;
+
+	WARN_ON(buf->cpudata != cpudata);
+	WARN_ON(cpudata->cpu != smp_processor_id());
+	WARN_ON(cpudata->drvdata != drvdata);
+	if (cpudata->mode != CS_MODE_PERF)
+		return -EINVAL;
+
+	trbe_drain_and_disable_local(cpudata);
+	buf->cpudata = NULL;
+	cpudata->buf = NULL;
+	cpudata->mode = CS_MODE_DISABLED;
+	return 0;
+}
+
+static void trbe_handle_spurious(struct perf_output_handle *handle)
+{
+	struct trbe_buf *buf = etm_perf_sink_config(handle);
+	u64 trblimitr = read_sysreg_s(SYS_TRBLIMITR_EL1);
+
+	/*
+	 * If the IRQ was spurious, simply re-enable the TRBE
+	 * back without modifying the buffer parameters to
+	 * retain the trace collected so far.
+	 */
+	set_trbe_enabled(buf->cpudata, trblimitr);
+}
+
+static int trbe_handle_overflow(struct perf_output_handle *handle)
+{
+	struct perf_event *event = handle->event;
+	struct trbe_buf *buf = etm_perf_sink_config(handle);
+	unsigned long size;
+	struct etm_event_data *event_data;
+
+	size = trbe_get_trace_size(handle, buf, true);
+	if (buf->snapshot)
+		handle->head += size;
+
+	trbe_report_wrap_event(handle);
+	perf_aux_output_end(handle, size);
+	event_data = perf_aux_output_begin(handle, event);
+	if (!event_data) {
+		/*
+		 * We are unable to restart the trace collection,
+		 * thus leave the TRBE disabled. The etm-perf driver
+		 * is able to detect this with a disconnected handle
+		 * (handle->event = NULL).
+		 */
+		trbe_drain_and_disable_local(buf->cpudata);
+		*this_cpu_ptr(buf->cpudata->drvdata->handle) = NULL;
+		return -EINVAL;
+	}
+
+	return __arm_trbe_enable(buf, handle);
+}
+
+static bool is_perf_trbe(struct perf_output_handle *handle)
+{
+	struct trbe_buf *buf = etm_perf_sink_config(handle);
+	struct trbe_cpudata *cpudata = buf->cpudata;
+	struct trbe_drvdata *drvdata = cpudata->drvdata;
+	int cpu = smp_processor_id();
+
+	WARN_ON(buf->trbe_hw_base != get_trbe_base_pointer());
+	WARN_ON(buf->trbe_limit != get_trbe_limit_pointer());
+
+	if (cpudata->mode != CS_MODE_PERF)
+		return false;
+
+	if (cpudata->cpu != cpu)
+		return false;
+
+	if (!cpumask_test_cpu(cpu, &drvdata->supported_cpus))
+		return false;
+
+	return true;
+}
+
+static irqreturn_t arm_trbe_irq_handler(int irq, void *dev)
+{
+	struct perf_output_handle **handle_ptr = dev;
+	struct perf_output_handle *handle = *handle_ptr;
+	struct trbe_buf *buf = etm_perf_sink_config(handle);
+	enum trbe_fault_action act;
+	u64 status;
+	bool truncated = false;
+	u64 trfcr;
+
+	/* Reads to TRBSR_EL1 is fine when TRBE is active */
+	status = read_sysreg_s(SYS_TRBSR_EL1);
+	/*
+	 * If the pending IRQ was handled by update_buffer callback
+	 * we have nothing to do here.
+	 */
+	if (!is_trbe_irq(status))
+		return IRQ_NONE;
+
+	/* Prohibit the CPU from tracing before we disable the TRBE */
+	trfcr = cpu_prohibit_trace();
+	/*
+	 * Ensure the trace is visible to the CPUs and
+	 * any external aborts have been resolved.
+	 */
+	trbe_drain_and_disable_local(buf->cpudata);
+	clr_trbe_irq();
+	isb();
+
+	if (WARN_ON_ONCE(!handle) || !perf_get_aux(handle))
+		return IRQ_NONE;
+
+	if (!is_perf_trbe(handle))
+		return IRQ_NONE;
+
+	act = trbe_get_fault_act(handle, status);
+	switch (act) {
+	case TRBE_FAULT_ACT_WRAP:
+		truncated = !!trbe_handle_overflow(handle);
+		break;
+	case TRBE_FAULT_ACT_SPURIOUS:
+		trbe_handle_spurious(handle);
+		break;
+	case TRBE_FAULT_ACT_FATAL:
+		trbe_stop_and_truncate_event(handle);
+		truncated = true;
+		break;
+	}
+
+	/*
+	 * If the buffer was truncated, ensure perf callbacks
+	 * have completed, which will disable the event.
+	 *
+	 * Otherwise, restore the trace filter controls to
+	 * allow the tracing.
+	 */
+	if (truncated)
+		irq_work_run();
+	else
+		write_trfcr(trfcr);
+
+	return IRQ_HANDLED;
+}
+
+static const struct coresight_ops_sink arm_trbe_sink_ops = {
+	.enable		= arm_trbe_enable,
+	.disable	= arm_trbe_disable,
+	.alloc_buffer	= arm_trbe_alloc_buffer,
+	.free_buffer	= arm_trbe_free_buffer,
+	.update_buffer	= arm_trbe_update_buffer,
+};
+
+static const struct coresight_ops arm_trbe_cs_ops = {
+	.sink_ops	= &arm_trbe_sink_ops,
+};
+
+static ssize_t align_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct trbe_cpudata *cpudata = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%llx\n", cpudata->trbe_hw_align);
+}
+static DEVICE_ATTR_RO(align);
+
+static ssize_t flag_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct trbe_cpudata *cpudata = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", cpudata->trbe_flag);
+}
+static DEVICE_ATTR_RO(flag);
+
+static struct attribute *arm_trbe_attrs[] = {
+	&dev_attr_align.attr,
+	&dev_attr_flag.attr,
+	NULL,
+};
+
+static const struct attribute_group arm_trbe_group = {
+	.attrs = arm_trbe_attrs,
+};
+
+static const struct attribute_group *arm_trbe_groups[] = {
+	&arm_trbe_group,
+	NULL,
+};
+
+static void arm_trbe_enable_cpu(void *info)
+{
+	struct trbe_drvdata *drvdata = info;
+	struct trbe_cpudata *cpudata = this_cpu_ptr(drvdata->cpudata);
+
+	trbe_reset_local(cpudata);
+	enable_percpu_irq(drvdata->irq, IRQ_TYPE_NONE);
+}
+
+static void arm_trbe_disable_cpu(void *info)
+{
+	struct trbe_drvdata *drvdata = info;
+	struct trbe_cpudata *cpudata = this_cpu_ptr(drvdata->cpudata);
+
+	disable_percpu_irq(drvdata->irq);
+	trbe_reset_local(cpudata);
+}
+
+
+static void arm_trbe_register_coresight_cpu(struct trbe_drvdata *drvdata, int cpu)
+{
+	struct trbe_cpudata *cpudata = per_cpu_ptr(drvdata->cpudata, cpu);
+	struct coresight_device *trbe_csdev = coresight_get_percpu_sink(cpu);
+	struct coresight_desc desc = { 0 };
+	struct device *dev;
+
+	if (WARN_ON(trbe_csdev))
+		return;
+
+	/* If the TRBE was not probed on the CPU, we shouldn't be here */
+	if (WARN_ON(!cpudata->drvdata))
+		return;
+
+	dev = &cpudata->drvdata->pdev->dev;
+	desc.name = devm_kasprintf(dev, GFP_KERNEL, "trbe%d", cpu);
+	if (!desc.name)
+		goto cpu_clear;
+
+	desc.pdata = coresight_get_platform_data(dev);
+	if (IS_ERR(desc.pdata))
+		goto cpu_clear;
+
+	desc.type = CORESIGHT_DEV_TYPE_SINK;
+	desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_PERCPU_SYSMEM;
+	desc.ops = &arm_trbe_cs_ops;
+	desc.groups = arm_trbe_groups;
+	desc.dev = dev;
+	trbe_csdev = coresight_register(&desc);
+	if (IS_ERR(trbe_csdev))
+		goto cpu_clear;
+
+	dev_set_drvdata(&trbe_csdev->dev, cpudata);
+	coresight_set_percpu_sink(cpu, trbe_csdev);
+	return;
+cpu_clear:
+	cpumask_clear_cpu(cpu, &drvdata->supported_cpus);
+}
+
+/*
+ * Must be called with preemption disabled, for trbe_check_errata().
+ */
+static void arm_trbe_probe_cpu(void *info)
+{
+	struct trbe_drvdata *drvdata = info;
+	int cpu = smp_processor_id();
+	struct trbe_cpudata *cpudata = per_cpu_ptr(drvdata->cpudata, cpu);
+	u64 trbidr;
+
+	if (WARN_ON(!cpudata))
+		goto cpu_clear;
+
+	if (!is_trbe_available()) {
+		pr_err("TRBE is not implemented on cpu %d\n", cpu);
+		goto cpu_clear;
+	}
+
+	trbidr = read_sysreg_s(SYS_TRBIDR_EL1);
+	if (!is_trbe_programmable(trbidr)) {
+		pr_err("TRBE is owned in higher exception level on cpu %d\n", cpu);
+		goto cpu_clear;
+	}
+
+	cpudata->trbe_hw_align = 1ULL << get_trbe_address_align(trbidr);
+	if (cpudata->trbe_hw_align > SZ_2K) {
+		pr_err("Unsupported alignment on cpu %d\n", cpu);
+		goto cpu_clear;
+	}
+
+	/*
+	 * Run the TRBE erratum checks, now that we know
+	 * this instance is about to be registered.
+	 */
+	trbe_check_errata(cpudata);
+
+	if (trbe_is_broken(cpudata)) {
+		pr_err("Disabling TRBE on cpu%d due to erratum\n", cpu);
+		goto cpu_clear;
+	}
+
+	/*
+	 * If the TRBE is affected by erratum TRBE_WORKAROUND_OVERWRITE_FILL_MODE,
+	 * we must always program the TBRPTR_EL1, 256bytes from a page
+	 * boundary, with TRBBASER_EL1 set to the page, to prevent
+	 * TRBE over-writing 256bytes at TRBBASER_EL1 on FILL event.
+	 *
+	 * Thus make sure we always align our write pointer to a PAGE_SIZE,
+	 * which also guarantees that we have at least a PAGE_SIZE space in
+	 * the buffer (TRBLIMITR is PAGE aligned) and thus we can skip
+	 * the required bytes at the base.
+	 */
+	if (trbe_may_overwrite_in_fill_mode(cpudata))
+		cpudata->trbe_align = PAGE_SIZE;
+	else
+		cpudata->trbe_align = cpudata->trbe_hw_align;
+
+	cpudata->trbe_flag = get_trbe_flag_update(trbidr);
+	cpudata->cpu = cpu;
+	cpudata->drvdata = drvdata;
+	return;
+cpu_clear:
+	cpumask_clear_cpu(cpu, &drvdata->supported_cpus);
+}
+
+static void arm_trbe_remove_coresight_cpu(struct trbe_drvdata *drvdata, int cpu)
+{
+	struct coresight_device *trbe_csdev = coresight_get_percpu_sink(cpu);
+
+	if (trbe_csdev) {
+		coresight_unregister(trbe_csdev);
+		coresight_set_percpu_sink(cpu, NULL);
+	}
+}
+
+static int arm_trbe_probe_coresight(struct trbe_drvdata *drvdata)
+{
+	int cpu;
+
+	drvdata->cpudata = alloc_percpu(typeof(*drvdata->cpudata));
+	if (!drvdata->cpudata)
+		return -ENOMEM;
+
+	for_each_cpu(cpu, &drvdata->supported_cpus) {
+		/* If we fail to probe the CPU, let us defer it to hotplug callbacks */
+		if (smp_call_function_single(cpu, arm_trbe_probe_cpu, drvdata, 1))
+			continue;
+		if (cpumask_test_cpu(cpu, &drvdata->supported_cpus))
+			arm_trbe_register_coresight_cpu(drvdata, cpu);
+		if (cpumask_test_cpu(cpu, &drvdata->supported_cpus))
+			smp_call_function_single(cpu, arm_trbe_enable_cpu, drvdata, 1);
+	}
+	return 0;
+}
+
+static int arm_trbe_remove_coresight(struct trbe_drvdata *drvdata)
+{
+	int cpu;
+
+	for_each_cpu(cpu, &drvdata->supported_cpus) {
+		smp_call_function_single(cpu, arm_trbe_disable_cpu, drvdata, 1);
+		arm_trbe_remove_coresight_cpu(drvdata, cpu);
+	}
+	free_percpu(drvdata->cpudata);
+	return 0;
+}
+
+static void arm_trbe_probe_hotplugged_cpu(struct trbe_drvdata *drvdata)
+{
+	preempt_disable();
+	arm_trbe_probe_cpu(drvdata);
+	preempt_enable();
+}
+
+static int arm_trbe_cpu_startup(unsigned int cpu, struct hlist_node *node)
+{
+	struct trbe_drvdata *drvdata = hlist_entry_safe(node, struct trbe_drvdata, hotplug_node);
+
+	if (cpumask_test_cpu(cpu, &drvdata->supported_cpus)) {
+
+		/*
+		 * If this CPU was not probed for TRBE,
+		 * initialize it now.
+		 */
+		if (!coresight_get_percpu_sink(cpu)) {
+			arm_trbe_probe_hotplugged_cpu(drvdata);
+			if (cpumask_test_cpu(cpu, &drvdata->supported_cpus))
+				arm_trbe_register_coresight_cpu(drvdata, cpu);
+			if (cpumask_test_cpu(cpu, &drvdata->supported_cpus))
+				arm_trbe_enable_cpu(drvdata);
+		} else {
+			arm_trbe_enable_cpu(drvdata);
+		}
+	}
+	return 0;
+}
+
+static int arm_trbe_cpu_teardown(unsigned int cpu, struct hlist_node *node)
+{
+	struct trbe_drvdata *drvdata = hlist_entry_safe(node, struct trbe_drvdata, hotplug_node);
+
+	if (cpumask_test_cpu(cpu, &drvdata->supported_cpus))
+		arm_trbe_disable_cpu(drvdata);
+	return 0;
+}
+
+static int arm_trbe_probe_cpuhp(struct trbe_drvdata *drvdata)
+{
+	enum cpuhp_state trbe_online;
+	int ret;
+
+	trbe_online = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, DRVNAME,
+					      arm_trbe_cpu_startup, arm_trbe_cpu_teardown);
+	if (trbe_online < 0)
+		return trbe_online;
+
+	ret = cpuhp_state_add_instance(trbe_online, &drvdata->hotplug_node);
+	if (ret) {
+		cpuhp_remove_multi_state(trbe_online);
+		return ret;
+	}
+	drvdata->trbe_online = trbe_online;
+	return 0;
+}
+
+static void arm_trbe_remove_cpuhp(struct trbe_drvdata *drvdata)
+{
+	cpuhp_state_remove_instance(drvdata->trbe_online, &drvdata->hotplug_node);
+	cpuhp_remove_multi_state(drvdata->trbe_online);
+}
+
+static int arm_trbe_probe_irq(struct platform_device *pdev,
+			      struct trbe_drvdata *drvdata)
+{
+	int ret;
+
+	drvdata->irq = platform_get_irq(pdev, 0);
+	if (drvdata->irq < 0) {
+		pr_err("IRQ not found for the platform device\n");
+		return drvdata->irq;
+	}
+
+	if (!irq_is_percpu(drvdata->irq)) {
+		pr_err("IRQ is not a PPI\n");
+		return -EINVAL;
+	}
+
+	if (irq_get_percpu_devid_partition(drvdata->irq, &drvdata->supported_cpus))
+		return -EINVAL;
+
+	drvdata->handle = alloc_percpu(struct perf_output_handle *);
+	if (!drvdata->handle)
+		return -ENOMEM;
+
+	ret = request_percpu_irq(drvdata->irq, arm_trbe_irq_handler, DRVNAME, drvdata->handle);
+	if (ret) {
+		free_percpu(drvdata->handle);
+		return ret;
+	}
+	return 0;
+}
+
+static void arm_trbe_remove_irq(struct trbe_drvdata *drvdata)
+{
+	free_percpu_irq(drvdata->irq, drvdata->handle);
+	free_percpu(drvdata->handle);
+}
+
+static int arm_trbe_device_probe(struct platform_device *pdev)
+{
+	struct trbe_drvdata *drvdata;
+	struct device *dev = &pdev->dev;
+	int ret;
+
+	/* Trace capture is not possible with kernel page table isolation */
+	if (arm64_kernel_unmapped_at_el0()) {
+		pr_err("TRBE wouldn't work if kernel gets unmapped at EL0\n");
+		return -EOPNOTSUPP;
+	}
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	dev_set_drvdata(dev, drvdata);
+	drvdata->pdev = pdev;
+	ret = arm_trbe_probe_irq(pdev, drvdata);
+	if (ret)
+		return ret;
+
+	ret = arm_trbe_probe_coresight(drvdata);
+	if (ret)
+		goto probe_failed;
+
+	ret = arm_trbe_probe_cpuhp(drvdata);
+	if (ret)
+		goto cpuhp_failed;
+
+	return 0;
+cpuhp_failed:
+	arm_trbe_remove_coresight(drvdata);
+probe_failed:
+	arm_trbe_remove_irq(drvdata);
+	return ret;
+}
+
+static int arm_trbe_device_remove(struct platform_device *pdev)
+{
+	struct trbe_drvdata *drvdata = platform_get_drvdata(pdev);
+
+	arm_trbe_remove_cpuhp(drvdata);
+	arm_trbe_remove_coresight(drvdata);
+	arm_trbe_remove_irq(drvdata);
+	return 0;
+}
+
+static const struct of_device_id arm_trbe_of_match[] = {
+	{ .compatible = "arm,trace-buffer-extension"},
+	{},
+};
+MODULE_DEVICE_TABLE(of, arm_trbe_of_match);
+
+static struct platform_driver arm_trbe_driver = {
+	.driver	= {
+		.name = DRVNAME,
+		.of_match_table = of_match_ptr(arm_trbe_of_match),
+		.suppress_bind_attrs = true,
+	},
+	.probe	= arm_trbe_device_probe,
+	.remove	= arm_trbe_device_remove,
+};
+
+static int __init arm_trbe_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&arm_trbe_driver);
+	if (!ret)
+		return 0;
+
+	pr_err("Error registering %s platform driver\n", DRVNAME);
+	return ret;
+}
+
+static void __exit arm_trbe_exit(void)
+{
+	platform_driver_unregister(&arm_trbe_driver);
+}
+module_init(arm_trbe_init);
+module_exit(arm_trbe_exit);
+
+MODULE_AUTHOR("Anshuman Khandual <anshuman.khandual@arm.com>");
+MODULE_DESCRIPTION("Arm Trace Buffer Extension (TRBE) driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/hwtracing/coresight/coresight-trbe.h b/drivers/hwtracing/coresight/coresight-trbe.h
new file mode 100644
index 0000000000..e915e749be
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-trbe.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This contains all required hardware related helper functions for
+ * Trace Buffer Extension (TRBE) driver in the coresight framework.
+ *
+ * Copyright (C) 2020 ARM Ltd.
+ *
+ * Author: Anshuman Khandual <anshuman.khandual@arm.com>
+ */
+#include <linux/coresight.h>
+#include <linux/device.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/smp.h>
+
+#include "coresight-etm-perf.h"
+
+static inline bool is_trbe_available(void)
+{
+	u64 aa64dfr0 = read_sysreg_s(SYS_ID_AA64DFR0_EL1);
+	unsigned int trbe = cpuid_feature_extract_unsigned_field(aa64dfr0,
+								 ID_AA64DFR0_EL1_TraceBuffer_SHIFT);
+
+	return trbe >= ID_AA64DFR0_EL1_TraceBuffer_IMP;
+}
+
+static inline bool is_trbe_enabled(void)
+{
+	u64 trblimitr = read_sysreg_s(SYS_TRBLIMITR_EL1);
+
+	return trblimitr & TRBLIMITR_EL1_E;
+}
+
+#define TRBE_EC_OTHERS		0
+#define TRBE_EC_STAGE1_ABORT	36
+#define TRBE_EC_STAGE2_ABORT	37
+
+static inline int get_trbe_ec(u64 trbsr)
+{
+	return (trbsr & TRBSR_EL1_EC_MASK) >> TRBSR_EL1_EC_SHIFT;
+}
+
+#define TRBE_BSC_NOT_STOPPED 0
+#define TRBE_BSC_FILLED      1
+#define TRBE_BSC_TRIGGERED   2
+
+static inline int get_trbe_bsc(u64 trbsr)
+{
+	return (trbsr & TRBSR_EL1_BSC_MASK) >> TRBSR_EL1_BSC_SHIFT;
+}
+
+static inline void clr_trbe_irq(void)
+{
+	u64 trbsr = read_sysreg_s(SYS_TRBSR_EL1);
+
+	trbsr &= ~TRBSR_EL1_IRQ;
+	write_sysreg_s(trbsr, SYS_TRBSR_EL1);
+}
+
+static inline bool is_trbe_irq(u64 trbsr)
+{
+	return trbsr & TRBSR_EL1_IRQ;
+}
+
+static inline bool is_trbe_trg(u64 trbsr)
+{
+	return trbsr & TRBSR_EL1_TRG;
+}
+
+static inline bool is_trbe_wrap(u64 trbsr)
+{
+	return trbsr & TRBSR_EL1_WRAP;
+}
+
+static inline bool is_trbe_abort(u64 trbsr)
+{
+	return trbsr & TRBSR_EL1_EA;
+}
+
+static inline bool is_trbe_running(u64 trbsr)
+{
+	return !(trbsr & TRBSR_EL1_S);
+}
+
+static inline bool get_trbe_flag_update(u64 trbidr)
+{
+	return trbidr & TRBIDR_EL1_F;
+}
+
+static inline bool is_trbe_programmable(u64 trbidr)
+{
+	return !(trbidr & TRBIDR_EL1_P);
+}
+
+static inline int get_trbe_address_align(u64 trbidr)
+{
+	return (trbidr & TRBIDR_EL1_Align_MASK) >> TRBIDR_EL1_Align_SHIFT;
+}
+
+static inline unsigned long get_trbe_write_pointer(void)
+{
+	return read_sysreg_s(SYS_TRBPTR_EL1);
+}
+
+static inline void set_trbe_write_pointer(unsigned long addr)
+{
+	WARN_ON(is_trbe_enabled());
+	write_sysreg_s(addr, SYS_TRBPTR_EL1);
+}
+
+static inline unsigned long get_trbe_limit_pointer(void)
+{
+	u64 trblimitr = read_sysreg_s(SYS_TRBLIMITR_EL1);
+	unsigned long addr = trblimitr & TRBLIMITR_EL1_LIMIT_MASK;
+
+	WARN_ON(!IS_ALIGNED(addr, PAGE_SIZE));
+	return addr;
+}
+
+static inline unsigned long get_trbe_base_pointer(void)
+{
+	u64 trbbaser = read_sysreg_s(SYS_TRBBASER_EL1);
+	unsigned long addr = trbbaser & TRBBASER_EL1_BASE_MASK;
+
+	WARN_ON(!IS_ALIGNED(addr, PAGE_SIZE));
+	return addr;
+}
+
+static inline void set_trbe_base_pointer(unsigned long addr)
+{
+	WARN_ON(is_trbe_enabled());
+	WARN_ON(!IS_ALIGNED(addr, (1UL << TRBBASER_EL1_BASE_SHIFT)));
+	WARN_ON(!IS_ALIGNED(addr, PAGE_SIZE));
+	write_sysreg_s(addr, SYS_TRBBASER_EL1);
+}
diff --git a/drivers/hwtracing/coresight/ultrasoc-smb.c b/drivers/hwtracing/coresight/ultrasoc-smb.c
new file mode 100644
index 0000000000..6e32d31a95
--- /dev/null
+++ b/drivers/hwtracing/coresight/ultrasoc-smb.c
@@ -0,0 +1,637 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Siemens System Memory Buffer driver.
+ * Copyright(c) 2022, HiSilicon Limited.
+ */
+
+#include <linux/atomic.h>
+#include <linux/acpi.h>
+#include <linux/circ_buf.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
+
+#include "coresight-etm-perf.h"
+#include "coresight-priv.h"
+#include "ultrasoc-smb.h"
+
+DEFINE_CORESIGHT_DEVLIST(sink_devs, "ultra_smb");
+
+#define ULTRASOC_SMB_DSM_UUID	"82ae1283-7f6a-4cbe-aa06-53e8fb24db18"
+
+static bool smb_buffer_not_empty(struct smb_drv_data *drvdata)
+{
+	u32 buf_status = readl(drvdata->base + SMB_LB_INT_STS_REG);
+
+	return FIELD_GET(SMB_LB_INT_STS_NOT_EMPTY_MSK, buf_status);
+}
+
+static void smb_update_data_size(struct smb_drv_data *drvdata)
+{
+	struct smb_data_buffer *sdb = &drvdata->sdb;
+	u32 buf_wrptr;
+
+	buf_wrptr = readl(drvdata->base + SMB_LB_WR_ADDR_REG) -
+			  sdb->buf_hw_base;
+
+	/* Buffer is full */
+	if (buf_wrptr == sdb->buf_rdptr && smb_buffer_not_empty(drvdata)) {
+		sdb->data_size = sdb->buf_size;
+		return;
+	}
+
+	/* The buffer mode is circular buffer mode */
+	sdb->data_size = CIRC_CNT(buf_wrptr, sdb->buf_rdptr,
+				  sdb->buf_size);
+}
+
+/*
+ * The read pointer adds @nbytes bytes (may round up to the beginning)
+ * after the data is read or discarded, while needing to update the
+ * available data size.
+ */
+static void smb_update_read_ptr(struct smb_drv_data *drvdata, u32 nbytes)
+{
+	struct smb_data_buffer *sdb = &drvdata->sdb;
+
+	sdb->buf_rdptr += nbytes;
+	sdb->buf_rdptr %= sdb->buf_size;
+	writel(sdb->buf_hw_base + sdb->buf_rdptr,
+	       drvdata->base + SMB_LB_RD_ADDR_REG);
+
+	sdb->data_size -= nbytes;
+}
+
+static void smb_reset_buffer(struct smb_drv_data *drvdata)
+{
+	struct smb_data_buffer *sdb = &drvdata->sdb;
+	u32 write_ptr;
+
+	/*
+	 * We must flush and discard any data left in hardware path
+	 * to avoid corrupting the next session.
+	 * Note: The write pointer will never exceed the read pointer.
+	 */
+	writel(SMB_LB_PURGE_PURGED, drvdata->base + SMB_LB_PURGE_REG);
+
+	/* Reset SMB logical buffer status flags */
+	writel(SMB_LB_INT_STS_RESET, drvdata->base + SMB_LB_INT_STS_REG);
+
+	write_ptr = readl(drvdata->base + SMB_LB_WR_ADDR_REG);
+
+	/* Do nothing, not data left in hardware path */
+	if (!write_ptr || write_ptr == sdb->buf_rdptr + sdb->buf_hw_base)
+		return;
+
+	/*
+	 * The SMB_LB_WR_ADDR_REG register is read-only,
+	 * Synchronize the read pointer to write pointer.
+	 */
+	writel(write_ptr, drvdata->base + SMB_LB_RD_ADDR_REG);
+	sdb->buf_rdptr = write_ptr - sdb->buf_hw_base;
+}
+
+static int smb_open(struct inode *inode, struct file *file)
+{
+	struct smb_drv_data *drvdata = container_of(file->private_data,
+					struct smb_drv_data, miscdev);
+	int ret = 0;
+
+	spin_lock(&drvdata->spinlock);
+
+	if (drvdata->reading) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	if (atomic_read(&drvdata->csdev->refcnt)) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	smb_update_data_size(drvdata);
+
+	drvdata->reading = true;
+out:
+	spin_unlock(&drvdata->spinlock);
+
+	return ret;
+}
+
+static ssize_t smb_read(struct file *file, char __user *data, size_t len,
+			loff_t *ppos)
+{
+	struct smb_drv_data *drvdata = container_of(file->private_data,
+					struct smb_drv_data, miscdev);
+	struct smb_data_buffer *sdb = &drvdata->sdb;
+	struct device *dev = &drvdata->csdev->dev;
+	ssize_t to_copy = 0;
+
+	if (!len)
+		return 0;
+
+	if (!sdb->data_size)
+		return 0;
+
+	to_copy = min(sdb->data_size, len);
+
+	/* Copy parts of trace data when read pointer wrap around SMB buffer */
+	if (sdb->buf_rdptr + to_copy > sdb->buf_size)
+		to_copy = sdb->buf_size - sdb->buf_rdptr;
+
+	if (copy_to_user(data, sdb->buf_base + sdb->buf_rdptr, to_copy)) {
+		dev_dbg(dev, "Failed to copy data to user\n");
+		return -EFAULT;
+	}
+
+	*ppos += to_copy;
+	smb_update_read_ptr(drvdata, to_copy);
+	if (!sdb->data_size)
+		smb_reset_buffer(drvdata);
+
+	dev_dbg(dev, "%zu bytes copied\n", to_copy);
+	return to_copy;
+}
+
+static int smb_release(struct inode *inode, struct file *file)
+{
+	struct smb_drv_data *drvdata = container_of(file->private_data,
+					struct smb_drv_data, miscdev);
+
+	spin_lock(&drvdata->spinlock);
+	drvdata->reading = false;
+	spin_unlock(&drvdata->spinlock);
+
+	return 0;
+}
+
+static const struct file_operations smb_fops = {
+	.owner		= THIS_MODULE,
+	.open		= smb_open,
+	.read		= smb_read,
+	.release	= smb_release,
+	.llseek		= no_llseek,
+};
+
+static ssize_t buf_size_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	struct smb_drv_data *drvdata = dev_get_drvdata(dev->parent);
+
+	return sysfs_emit(buf, "0x%lx\n", drvdata->sdb.buf_size);
+}
+static DEVICE_ATTR_RO(buf_size);
+
+static struct attribute *smb_sink_attrs[] = {
+	coresight_simple_reg32(read_pos, SMB_LB_RD_ADDR_REG),
+	coresight_simple_reg32(write_pos, SMB_LB_WR_ADDR_REG),
+	coresight_simple_reg32(buf_status, SMB_LB_INT_STS_REG),
+	&dev_attr_buf_size.attr,
+	NULL
+};
+
+static const struct attribute_group smb_sink_group = {
+	.attrs = smb_sink_attrs,
+	.name = "mgmt",
+};
+
+static const struct attribute_group *smb_sink_groups[] = {
+	&smb_sink_group,
+	NULL
+};
+
+static void smb_enable_hw(struct smb_drv_data *drvdata)
+{
+	writel(SMB_GLB_EN_HW_ENABLE, drvdata->base + SMB_GLB_EN_REG);
+}
+
+static void smb_disable_hw(struct smb_drv_data *drvdata)
+{
+	writel(0x0, drvdata->base + SMB_GLB_EN_REG);
+}
+
+static void smb_enable_sysfs(struct coresight_device *csdev)
+{
+	struct smb_drv_data *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	if (drvdata->mode != CS_MODE_DISABLED)
+		return;
+
+	smb_enable_hw(drvdata);
+	drvdata->mode = CS_MODE_SYSFS;
+}
+
+static int smb_enable_perf(struct coresight_device *csdev, void *data)
+{
+	struct smb_drv_data *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct perf_output_handle *handle = data;
+	struct cs_buffers *buf = etm_perf_sink_config(handle);
+	pid_t pid;
+
+	if (!buf)
+		return -EINVAL;
+
+	/* Get a handle on the pid of the target process */
+	pid = buf->pid;
+
+	/* Device is already in used by other session */
+	if (drvdata->pid != -1 && drvdata->pid != pid)
+		return -EBUSY;
+
+	if (drvdata->pid == -1) {
+		smb_enable_hw(drvdata);
+		drvdata->pid = pid;
+		drvdata->mode = CS_MODE_PERF;
+	}
+
+	return 0;
+}
+
+static int smb_enable(struct coresight_device *csdev, enum cs_mode mode,
+		      void *data)
+{
+	struct smb_drv_data *drvdata = dev_get_drvdata(csdev->dev.parent);
+	int ret = 0;
+
+	spin_lock(&drvdata->spinlock);
+
+	/* Do nothing, the trace data is reading by other interface now */
+	if (drvdata->reading) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	/* Do nothing, the SMB is already enabled as other mode */
+	if (drvdata->mode != CS_MODE_DISABLED && drvdata->mode != mode) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	switch (mode) {
+	case CS_MODE_SYSFS:
+		smb_enable_sysfs(csdev);
+		break;
+	case CS_MODE_PERF:
+		ret = smb_enable_perf(csdev, data);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	if (ret)
+		goto out;
+
+	atomic_inc(&csdev->refcnt);
+
+	dev_dbg(&csdev->dev, "Ultrasoc SMB enabled\n");
+out:
+	spin_unlock(&drvdata->spinlock);
+
+	return ret;
+}
+
+static int smb_disable(struct coresight_device *csdev)
+{
+	struct smb_drv_data *drvdata = dev_get_drvdata(csdev->dev.parent);
+	int ret = 0;
+
+	spin_lock(&drvdata->spinlock);
+
+	if (drvdata->reading) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	if (atomic_dec_return(&csdev->refcnt)) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	/* Complain if we (somehow) got out of sync */
+	WARN_ON_ONCE(drvdata->mode == CS_MODE_DISABLED);
+
+	smb_disable_hw(drvdata);
+
+	/* Dissociate from the target process. */
+	drvdata->pid = -1;
+	drvdata->mode = CS_MODE_DISABLED;
+
+	dev_dbg(&csdev->dev, "Ultrasoc SMB disabled\n");
+out:
+	spin_unlock(&drvdata->spinlock);
+
+	return ret;
+}
+
+static void *smb_alloc_buffer(struct coresight_device *csdev,
+			      struct perf_event *event, void **pages,
+			      int nr_pages, bool overwrite)
+{
+	struct cs_buffers *buf;
+	int node;
+
+	node = (event->cpu == -1) ? NUMA_NO_NODE : cpu_to_node(event->cpu);
+	buf = kzalloc_node(sizeof(struct cs_buffers), GFP_KERNEL, node);
+	if (!buf)
+		return NULL;
+
+	buf->snapshot = overwrite;
+	buf->nr_pages = nr_pages;
+	buf->data_pages = pages;
+	buf->pid = task_pid_nr(event->owner);
+
+	return buf;
+}
+
+static void smb_free_buffer(void *config)
+{
+	struct cs_buffers *buf = config;
+
+	kfree(buf);
+}
+
+static void smb_sync_perf_buffer(struct smb_drv_data *drvdata,
+				 struct cs_buffers *buf,
+				 unsigned long head)
+{
+	struct smb_data_buffer *sdb = &drvdata->sdb;
+	char **dst_pages = (char **)buf->data_pages;
+	unsigned long to_copy;
+	long pg_idx, pg_offset;
+
+	pg_idx = head >> PAGE_SHIFT;
+	pg_offset = head & (PAGE_SIZE - 1);
+
+	while (sdb->data_size) {
+		unsigned long pg_space = PAGE_SIZE - pg_offset;
+
+		to_copy = min(sdb->data_size, pg_space);
+
+		/* Copy parts of trace data when read pointer wrap around */
+		if (sdb->buf_rdptr + to_copy > sdb->buf_size)
+			to_copy = sdb->buf_size - sdb->buf_rdptr;
+
+		memcpy(dst_pages[pg_idx] + pg_offset,
+			      sdb->buf_base + sdb->buf_rdptr, to_copy);
+
+		pg_offset += to_copy;
+		if (pg_offset >= PAGE_SIZE) {
+			pg_offset = 0;
+			pg_idx++;
+			pg_idx %= buf->nr_pages;
+		}
+		smb_update_read_ptr(drvdata, to_copy);
+	}
+
+	smb_reset_buffer(drvdata);
+}
+
+static unsigned long smb_update_buffer(struct coresight_device *csdev,
+				       struct perf_output_handle *handle,
+				       void *sink_config)
+{
+	struct smb_drv_data *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct smb_data_buffer *sdb = &drvdata->sdb;
+	struct cs_buffers *buf = sink_config;
+	unsigned long data_size = 0;
+	bool lost = false;
+
+	if (!buf)
+		return 0;
+
+	spin_lock(&drvdata->spinlock);
+
+	/* Don't do anything if another tracer is using this sink. */
+	if (atomic_read(&csdev->refcnt) != 1)
+		goto out;
+
+	smb_disable_hw(drvdata);
+	smb_update_data_size(drvdata);
+
+	/*
+	 * The SMB buffer may be bigger than the space available in the
+	 * perf ring buffer (handle->size). If so advance the offset so
+	 * that we get the latest trace data.
+	 */
+	if (sdb->data_size > handle->size) {
+		smb_update_read_ptr(drvdata, sdb->data_size - handle->size);
+		lost = true;
+	}
+
+	data_size = sdb->data_size;
+	smb_sync_perf_buffer(drvdata, buf, handle->head);
+	if (!buf->snapshot && lost)
+		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
+out:
+	spin_unlock(&drvdata->spinlock);
+
+	return data_size;
+}
+
+static const struct coresight_ops_sink smb_cs_ops = {
+	.enable		= smb_enable,
+	.disable	= smb_disable,
+	.alloc_buffer	= smb_alloc_buffer,
+	.free_buffer	= smb_free_buffer,
+	.update_buffer	= smb_update_buffer,
+};
+
+static const struct coresight_ops cs_ops = {
+	.sink_ops	= &smb_cs_ops,
+};
+
+static int smb_init_data_buffer(struct platform_device *pdev,
+				struct smb_data_buffer *sdb)
+{
+	struct resource *res;
+	void *base;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, SMB_BUF_ADDR_RES);
+	if (!res) {
+		dev_err(&pdev->dev, "SMB device failed to get resource\n");
+		return -EINVAL;
+	}
+
+	sdb->buf_rdptr = 0;
+	sdb->buf_hw_base = FIELD_GET(SMB_BUF_ADDR_LO_MSK, res->start);
+	sdb->buf_size = resource_size(res);
+	if (sdb->buf_size == 0)
+		return -EINVAL;
+
+	/*
+	 * This is a chunk of memory, use classic mapping with better
+	 * performance.
+	 */
+	base = devm_memremap(&pdev->dev, sdb->buf_hw_base, sdb->buf_size,
+				MEMREMAP_WB);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	sdb->buf_base = base;
+
+	return 0;
+}
+
+static void smb_init_hw(struct smb_drv_data *drvdata)
+{
+	smb_disable_hw(drvdata);
+
+	writel(SMB_LB_CFG_LO_DEFAULT, drvdata->base + SMB_LB_CFG_LO_REG);
+	writel(SMB_LB_CFG_HI_DEFAULT, drvdata->base + SMB_LB_CFG_HI_REG);
+	writel(SMB_GLB_CFG_DEFAULT, drvdata->base + SMB_GLB_CFG_REG);
+	writel(SMB_GLB_INT_CFG, drvdata->base + SMB_GLB_INT_REG);
+	writel(SMB_LB_INT_CTRL_CFG, drvdata->base + SMB_LB_INT_CTRL_REG);
+}
+
+static int smb_register_sink(struct platform_device *pdev,
+			     struct smb_drv_data *drvdata)
+{
+	struct coresight_platform_data *pdata = NULL;
+	struct coresight_desc desc = { 0 };
+	int ret;
+
+	pdata = coresight_get_platform_data(&pdev->dev);
+	if (IS_ERR(pdata))
+		return PTR_ERR(pdata);
+
+	desc.type = CORESIGHT_DEV_TYPE_SINK;
+	desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
+	desc.ops = &cs_ops;
+	desc.pdata = pdata;
+	desc.dev = &pdev->dev;
+	desc.groups = smb_sink_groups;
+	desc.name = coresight_alloc_device_name(&sink_devs, &pdev->dev);
+	if (!desc.name) {
+		dev_err(&pdev->dev, "Failed to alloc coresight device name");
+		return -ENOMEM;
+	}
+	desc.access = CSDEV_ACCESS_IOMEM(drvdata->base);
+
+	drvdata->csdev = coresight_register(&desc);
+	if (IS_ERR(drvdata->csdev))
+		return PTR_ERR(drvdata->csdev);
+
+	drvdata->miscdev.name = desc.name;
+	drvdata->miscdev.minor = MISC_DYNAMIC_MINOR;
+	drvdata->miscdev.fops = &smb_fops;
+	ret = misc_register(&drvdata->miscdev);
+	if (ret) {
+		coresight_unregister(drvdata->csdev);
+		dev_err(&pdev->dev, "Failed to register misc, ret=%d\n", ret);
+	}
+
+	return ret;
+}
+
+static void smb_unregister_sink(struct smb_drv_data *drvdata)
+{
+	misc_deregister(&drvdata->miscdev);
+	coresight_unregister(drvdata->csdev);
+}
+
+static int smb_config_inport(struct device *dev, bool enable)
+{
+	u64 func = enable ? 1 : 0;
+	union acpi_object *obj;
+	guid_t guid;
+	u64 rev = 0;
+
+	/*
+	 * Using DSM calls to enable/disable ultrasoc hardwares on
+	 * tracing path, to prevent ultrasoc packet format being exposed.
+	 */
+	if (guid_parse(ULTRASOC_SMB_DSM_UUID, &guid)) {
+		dev_err(dev, "Get GUID failed\n");
+		return -EINVAL;
+	}
+
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(dev), &guid, rev, func, NULL);
+	if (!obj) {
+		dev_err(dev, "ACPI handle failed\n");
+		return -ENODEV;
+	}
+
+	ACPI_FREE(obj);
+
+	return 0;
+}
+
+static int smb_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct smb_drv_data *drvdata;
+	int ret;
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	drvdata->base = devm_platform_ioremap_resource(pdev, SMB_REG_ADDR_RES);
+	if (IS_ERR(drvdata->base)) {
+		dev_err(dev, "Failed to ioremap resource\n");
+		return PTR_ERR(drvdata->base);
+	}
+
+	smb_init_hw(drvdata);
+
+	ret = smb_init_data_buffer(pdev, &drvdata->sdb);
+	if (ret) {
+		dev_err(dev, "Failed to init buffer, ret = %d\n", ret);
+		return ret;
+	}
+
+	ret = smb_config_inport(dev, true);
+	if (ret)
+		return ret;
+
+	smb_reset_buffer(drvdata);
+	platform_set_drvdata(pdev, drvdata);
+	spin_lock_init(&drvdata->spinlock);
+	drvdata->pid = -1;
+
+	ret = smb_register_sink(pdev, drvdata);
+	if (ret) {
+		smb_config_inport(&pdev->dev, false);
+		dev_err(dev, "Failed to register SMB sink\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int smb_remove(struct platform_device *pdev)
+{
+	struct smb_drv_data *drvdata = platform_get_drvdata(pdev);
+
+	smb_unregister_sink(drvdata);
+
+	smb_config_inport(&pdev->dev, false);
+
+	return 0;
+}
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id ultrasoc_smb_acpi_match[] = {
+	{"HISI03A1", 0},
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, ultrasoc_smb_acpi_match);
+#endif
+
+static struct platform_driver smb_driver = {
+	.driver = {
+		.name = "ultrasoc-smb",
+		.acpi_match_table = ACPI_PTR(ultrasoc_smb_acpi_match),
+		.suppress_bind_attrs = true,
+	},
+	.probe = smb_probe,
+	.remove = smb_remove,
+};
+module_platform_driver(smb_driver);
+
+MODULE_DESCRIPTION("UltraSoc SMB CoreSight driver");
+MODULE_LICENSE("Dual MIT/GPL");
+MODULE_AUTHOR("Jonathan Zhou <jonathan.zhouwen@huawei.com>");
+MODULE_AUTHOR("Qi Liu <liuqi115@huawei.com>");
diff --git a/drivers/hwtracing/coresight/ultrasoc-smb.h b/drivers/hwtracing/coresight/ultrasoc-smb.h
new file mode 100644
index 0000000000..82a44c14a8
--- /dev/null
+++ b/drivers/hwtracing/coresight/ultrasoc-smb.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/*
+ * Siemens System Memory Buffer driver.
+ * Copyright(c) 2022, HiSilicon Limited.
+ */
+
+#ifndef _ULTRASOC_SMB_H
+#define _ULTRASOC_SMB_H
+
+#include <linux/miscdevice.h>
+#include <linux/spinlock.h>
+
+/* Offset of SMB global registers */
+#define SMB_GLB_CFG_REG		0x00
+#define SMB_GLB_EN_REG		0x04
+#define SMB_GLB_INT_REG		0x08
+
+/* Offset of SMB logical buffer registers */
+#define SMB_LB_CFG_LO_REG	0x40
+#define SMB_LB_CFG_HI_REG	0x44
+#define SMB_LB_INT_CTRL_REG	0x48
+#define SMB_LB_INT_STS_REG	0x4c
+#define SMB_LB_RD_ADDR_REG	0x5c
+#define SMB_LB_WR_ADDR_REG	0x60
+#define SMB_LB_PURGE_REG	0x64
+
+/* Set global config register */
+#define SMB_GLB_CFG_BURST_LEN_MSK	GENMASK(11, 4)
+#define SMB_GLB_CFG_IDLE_PRD_MSK	GENMASK(15, 12)
+#define SMB_GLB_CFG_MEM_WR_MSK		GENMASK(21, 16)
+#define SMB_GLB_CFG_MEM_RD_MSK		GENMASK(27, 22)
+#define SMB_GLB_CFG_DEFAULT	(FIELD_PREP(SMB_GLB_CFG_BURST_LEN_MSK, 0xf) | \
+				 FIELD_PREP(SMB_GLB_CFG_IDLE_PRD_MSK, 0xf) | \
+				 FIELD_PREP(SMB_GLB_CFG_MEM_WR_MSK, 0x3) | \
+				 FIELD_PREP(SMB_GLB_CFG_MEM_RD_MSK, 0x1b))
+
+#define SMB_GLB_EN_HW_ENABLE	BIT(0)
+
+/* Set global interrupt control register */
+#define SMB_GLB_INT_EN		BIT(0)
+#define SMB_GLB_INT_PULSE	BIT(1) /* Interrupt type: 1 - Pulse */
+#define SMB_GLB_INT_ACT_H	BIT(2) /* Interrupt polarity: 1 - Active high */
+#define SMB_GLB_INT_CFG		(SMB_GLB_INT_EN | SMB_GLB_INT_PULSE | \
+				 SMB_GLB_INT_ACT_H)
+
+/* Set logical buffer config register lower 32 bits */
+#define SMB_LB_CFG_LO_EN		BIT(0)
+#define SMB_LB_CFG_LO_SINGLE_END	BIT(1)
+#define SMB_LB_CFG_LO_INIT		BIT(8)
+#define SMB_LB_CFG_LO_CONT		BIT(11)
+#define SMB_LB_CFG_LO_FLOW_MSK		GENMASK(19, 16)
+#define SMB_LB_CFG_LO_DEFAULT	(SMB_LB_CFG_LO_EN | SMB_LB_CFG_LO_SINGLE_END | \
+				 SMB_LB_CFG_LO_INIT | SMB_LB_CFG_LO_CONT | \
+				 FIELD_PREP(SMB_LB_CFG_LO_FLOW_MSK, 0xf))
+
+/* Set logical buffer config register upper 32 bits */
+#define SMB_LB_CFG_HI_RANGE_UP_MSK	GENMASK(15, 8)
+#define SMB_LB_CFG_HI_DEFAULT	FIELD_PREP(SMB_LB_CFG_HI_RANGE_UP_MSK, 0xff)
+
+/*
+ * Set logical buffer interrupt control register.
+ * The register control the validity of both real-time events and
+ * interrupts. When logical buffer status changes causes to issue
+ * an interrupt at the same time as it issues a real-time event.
+ * Real-time events are used in SMB driver, which needs to get the buffer
+ * status. Interrupts are used in debugger mode.
+ * SMB_LB_INT_CTRL_BUF_NOTE_MASK control which events flags or interrupts
+ * are valid.
+ */
+#define SMB_LB_INT_CTRL_EN		BIT(0)
+#define SMB_LB_INT_CTRL_BUF_NOTE_MSK	GENMASK(11, 8)
+#define SMB_LB_INT_CTRL_CFG	(SMB_LB_INT_CTRL_EN | \
+				 FIELD_PREP(SMB_LB_INT_CTRL_BUF_NOTE_MSK, 0xf))
+
+/* Set logical buffer interrupt status register */
+#define SMB_LB_INT_STS_NOT_EMPTY_MSK	BIT(0)
+#define SMB_LB_INT_STS_BUF_RESET_MSK	GENMASK(3, 0)
+#define SMB_LB_INT_STS_RESET	FIELD_PREP(SMB_LB_INT_STS_BUF_RESET_MSK, 0xf)
+
+#define SMB_LB_PURGE_PURGED	BIT(0)
+
+#define SMB_REG_ADDR_RES	0
+#define SMB_BUF_ADDR_RES	1
+#define SMB_BUF_ADDR_LO_MSK	GENMASK(31, 0)
+
+/**
+ * struct smb_data_buffer - Details of the buffer used by SMB
+ * @buf_base:	Memory mapped base address of SMB.
+ * @buf_hw_base:	SMB buffer start Physical base address, only used 32bits.
+ * @buf_size:	Size of the buffer.
+ * @data_size:	Size of the available trace data for SMB.
+ * @buf_rdptr:	Current read position (index) within the buffer.
+ */
+struct smb_data_buffer {
+	void *buf_base;
+	u32 buf_hw_base;
+	unsigned long buf_size;
+	unsigned long data_size;
+	unsigned long buf_rdptr;
+};
+
+/**
+ * struct smb_drv_data - specifics associated to an SMB component
+ * @base:	Memory mapped base address for SMB component.
+ * @csdev:	Component vitals needed by the framework.
+ * @sdb:	Data buffer for SMB.
+ * @miscdev:	Specifics to handle "/dev/xyz.smb" entry.
+ * @spinlock:	Control data access to one at a time.
+ * @reading:	Synchronise user space access to SMB buffer.
+ * @pid:	Process ID of the process being monitored by the
+ *		session that is using this component.
+ * @mode:	How this SMB is being used, perf mode or sysfs mode.
+ */
+struct smb_drv_data {
+	void __iomem *base;
+	struct coresight_device	*csdev;
+	struct smb_data_buffer sdb;
+	struct miscdevice miscdev;
+	spinlock_t spinlock;
+	bool reading;
+	pid_t pid;
+	enum cs_mode mode;
+};
+
+#endif
diff --git a/drivers/hwtracing/intel_th/Kconfig b/drivers/hwtracing/intel_th/Kconfig
new file mode 100644
index 0000000000..4b6359326e
--- /dev/null
+++ b/drivers/hwtracing/intel_th/Kconfig
@@ -0,0 +1,86 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config INTEL_TH
+	tristate "Intel(R) Trace Hub controller"
+	depends on HAS_DMA && HAS_IOMEM
+	help
+	  Intel(R) Trace Hub (TH) is a set of hardware blocks (subdevices) that
+	  produce, switch and output trace data from multiple hardware and
+	  software sources over several types of trace output ports encoded
+	  in System Trace Protocol (MIPI STPv2) and is intended to perform
+	  full system debugging.
+
+	  This option enables intel_th bus and common code used by TH
+	  subdevices to interact with each other and hardware and for
+	  platform glue layers to drive Intel TH devices.
+
+	  Say Y here to enable Intel(R) Trace Hub controller support.
+
+if INTEL_TH
+
+config INTEL_TH_PCI
+	tristate "Intel(R) Trace Hub PCI controller"
+	depends on PCI
+	help
+	  Intel(R) Trace Hub may exist as a PCI device. This option enables
+	  support glue layer for PCI-based Intel TH.
+
+	  Say Y here to enable PCI Intel TH support.
+
+config INTEL_TH_ACPI
+	tristate "Intel(R) Trace Hub ACPI controller"
+	depends on ACPI
+	help
+	  Intel(R) Trace Hub may exist as an ACPI device. This option enables
+	  support glue layer for ACPI-based Intel TH. This typically implies
+	  'host debugger' mode, that is, the trace configuration and capture
+	  is handled by an external debug host and corresponding controls will
+	  not be available on the target.
+
+	  Say Y here to enable ACPI Intel TH support.
+
+config INTEL_TH_GTH
+	tristate "Intel(R) Trace Hub Global Trace Hub"
+	help
+	  Global Trace Hub (GTH) is the central component of the
+	  Intel TH infrastructure and acts as a switch for source
+	  and output devices. This driver is required for other
+	  Intel TH subdevices to initialize.
+
+	  Say Y here to enable GTH subdevice of Intel(R) Trace Hub.
+
+config INTEL_TH_STH
+	tristate "Intel(R) Trace Hub Software Trace Hub support"
+	depends on STM
+	help
+	  Software Trace Hub (STH) enables trace data from software
+	  trace sources to be sent out via Intel(R) Trace Hub. It
+	  uses stm class device to interface with its sources.
+
+	  Say Y here to enable STH subdevice of Intel(R) Trace Hub.
+
+config INTEL_TH_MSU
+	tristate "Intel(R) Trace Hub Memory Storage Unit"
+	help
+	  Memory Storage Unit (MSU) trace output device enables
+	  storing STP traces to system memory. It supports single
+	  and multiblock modes of operation and provides read()
+	  and mmap() access to the collected data.
+
+	  Say Y here to enable MSU output device for Intel TH.
+
+config INTEL_TH_PTI
+	tristate "Intel(R) Trace Hub PTI output"
+	help
+	  Parallel Trace Interface unit (PTI) is a trace output device
+	  of Intel TH architecture that facilitates STP trace output via
+	  a PTI port.
+
+	  Say Y to enable PTI output of Intel TH data.
+
+config INTEL_TH_DEBUG
+	bool "Intel(R) Trace Hub debugging"
+	depends on DEBUG_FS
+	help
+	  Say Y here to enable debugging.
+
+endif
diff --git a/drivers/hwtracing/intel_th/Makefile b/drivers/hwtracing/intel_th/Makefile
new file mode 100644
index 0000000000..b63eb8f309
--- /dev/null
+++ b/drivers/hwtracing/intel_th/Makefile
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_INTEL_TH)		+= intel_th.o
+intel_th-y			:= core.o
+intel_th-$(CONFIG_INTEL_TH_DEBUG) += debug.o
+
+obj-$(CONFIG_INTEL_TH_PCI)	+= intel_th_pci.o
+intel_th_pci-y			:= pci.o
+
+obj-$(CONFIG_INTEL_TH_ACPI)	+= intel_th_acpi.o
+intel_th_acpi-y			:= acpi.o
+
+obj-$(CONFIG_INTEL_TH_GTH)	+= intel_th_gth.o
+intel_th_gth-y			:= gth.o
+
+obj-$(CONFIG_INTEL_TH_STH)	+= intel_th_sth.o
+intel_th_sth-y			:= sth.o
+
+obj-$(CONFIG_INTEL_TH_MSU)	+= intel_th_msu.o
+intel_th_msu-y			:= msu.o
+
+obj-$(CONFIG_INTEL_TH_PTI)	+= intel_th_pti.o
+intel_th_pti-y			:= pti.o
+
+obj-$(CONFIG_INTEL_TH_MSU)	+= intel_th_msu_sink.o
+intel_th_msu_sink-y		:= msu-sink.o
diff --git a/drivers/hwtracing/intel_th/acpi.c b/drivers/hwtracing/intel_th/acpi.c
new file mode 100644
index 0000000000..87f9024e4b
--- /dev/null
+++ b/drivers/hwtracing/intel_th/acpi.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel(R) Trace Hub ACPI driver
+ *
+ * Copyright (C) 2017 Intel Corporation.
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <linux/platform_device.h>
+#include <linux/acpi.h>
+
+#include "intel_th.h"
+
+#define DRIVER_NAME "intel_th_acpi"
+
+static const struct intel_th_drvdata intel_th_acpi_pch = {
+	.host_mode_only	= 1,
+};
+
+static const struct intel_th_drvdata intel_th_acpi_uncore = {
+	.host_mode_only	= 1,
+};
+
+static const struct acpi_device_id intel_th_acpi_ids[] = {
+	{ "INTC1000",	(kernel_ulong_t)&intel_th_acpi_uncore },
+	{ "INTC1001",	(kernel_ulong_t)&intel_th_acpi_pch },
+	{ "",		0 },
+};
+
+MODULE_DEVICE_TABLE(acpi, intel_th_acpi_ids);
+
+static int intel_th_acpi_probe(struct platform_device *pdev)
+{
+	struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
+	struct resource resource[TH_MMIO_END];
+	const struct acpi_device_id *id;
+	struct intel_th *th;
+	int i, r;
+
+	id = acpi_match_device(intel_th_acpi_ids, &pdev->dev);
+	if (!id)
+		return -ENODEV;
+
+	for (i = 0, r = 0; i < pdev->num_resources && r < TH_MMIO_END; i++)
+		if (pdev->resource[i].flags &
+		    (IORESOURCE_IRQ | IORESOURCE_MEM))
+			resource[r++] = pdev->resource[i];
+
+	th = intel_th_alloc(&pdev->dev, (void *)id->driver_data, resource, r);
+	if (IS_ERR(th))
+		return PTR_ERR(th);
+
+	adev->driver_data = th;
+
+	return 0;
+}
+
+static int intel_th_acpi_remove(struct platform_device *pdev)
+{
+	struct intel_th *th = platform_get_drvdata(pdev);
+
+	intel_th_free(th);
+
+	return 0;
+}
+
+static struct platform_driver intel_th_acpi_driver = {
+	.probe		= intel_th_acpi_probe,
+	.remove		= intel_th_acpi_remove,
+	.driver		= {
+		.name			= DRIVER_NAME,
+		.acpi_match_table	= intel_th_acpi_ids,
+	},
+};
+
+module_platform_driver(intel_th_acpi_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel(R) Trace Hub ACPI controller driver");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@intel.com>");
diff --git a/drivers/hwtracing/intel_th/core.c b/drivers/hwtracing/intel_th/core.c
new file mode 100644
index 0000000000..cc7f879bb1
--- /dev/null
+++ b/drivers/hwtracing/intel_th/core.c
@@ -0,0 +1,1086 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel(R) Trace Hub driver core
+ *
+ * Copyright (C) 2014-2015 Intel Corporation.
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <linux/kdev_t.h>
+#include <linux/debugfs.h>
+#include <linux/idr.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+#include <linux/dma-mapping.h>
+
+#include "intel_th.h"
+#include "debug.h"
+
+static bool host_mode __read_mostly;
+module_param(host_mode, bool, 0444);
+
+static DEFINE_IDA(intel_th_ida);
+
+static int intel_th_match(struct device *dev, struct device_driver *driver)
+{
+	struct intel_th_driver *thdrv = to_intel_th_driver(driver);
+	struct intel_th_device *thdev = to_intel_th_device(dev);
+
+	if (thdev->type == INTEL_TH_SWITCH &&
+	    (!thdrv->enable || !thdrv->disable))
+		return 0;
+
+	return !strcmp(thdev->name, driver->name);
+}
+
+static int intel_th_child_remove(struct device *dev, void *data)
+{
+	device_release_driver(dev);
+
+	return 0;
+}
+
+static int intel_th_probe(struct device *dev)
+{
+	struct intel_th_driver *thdrv = to_intel_th_driver(dev->driver);
+	struct intel_th_device *thdev = to_intel_th_device(dev);
+	struct intel_th_driver *hubdrv;
+	struct intel_th_device *hub = NULL;
+	int ret;
+
+	if (thdev->type == INTEL_TH_SWITCH)
+		hub = thdev;
+	else if (dev->parent)
+		hub = to_intel_th_device(dev->parent);
+
+	if (!hub || !hub->dev.driver)
+		return -EPROBE_DEFER;
+
+	hubdrv = to_intel_th_driver(hub->dev.driver);
+
+	pm_runtime_set_active(dev);
+	pm_runtime_no_callbacks(dev);
+	pm_runtime_enable(dev);
+
+	ret = thdrv->probe(to_intel_th_device(dev));
+	if (ret)
+		goto out_pm;
+
+	if (thdrv->attr_group) {
+		ret = sysfs_create_group(&thdev->dev.kobj, thdrv->attr_group);
+		if (ret)
+			goto out;
+	}
+
+	if (thdev->type == INTEL_TH_OUTPUT &&
+	    !intel_th_output_assigned(thdev))
+		/* does not talk to hardware */
+		ret = hubdrv->assign(hub, thdev);
+
+out:
+	if (ret)
+		thdrv->remove(thdev);
+
+out_pm:
+	if (ret)
+		pm_runtime_disable(dev);
+
+	return ret;
+}
+
+static void intel_th_device_remove(struct intel_th_device *thdev);
+
+static void intel_th_remove(struct device *dev)
+{
+	struct intel_th_driver *thdrv = to_intel_th_driver(dev->driver);
+	struct intel_th_device *thdev = to_intel_th_device(dev);
+	struct intel_th_device *hub = to_intel_th_hub(thdev);
+
+	if (thdev->type == INTEL_TH_SWITCH) {
+		struct intel_th *th = to_intel_th(hub);
+		int i, lowest;
+
+		/*
+		 * disconnect outputs
+		 *
+		 * intel_th_child_remove returns 0 unconditionally, so there is
+		 * no need to check the return value of device_for_each_child.
+		 */
+		device_for_each_child(dev, thdev, intel_th_child_remove);
+
+		/*
+		 * Remove outputs, that is, hub's children: they are created
+		 * at hub's probe time by having the hub call
+		 * intel_th_output_enable() for each of them.
+		 */
+		for (i = 0, lowest = -1; i < th->num_thdevs; i++) {
+			/*
+			 * Move the non-output devices from higher up the
+			 * th->thdev[] array to lower positions to maintain
+			 * a contiguous array.
+			 */
+			if (th->thdev[i]->type != INTEL_TH_OUTPUT) {
+				if (lowest >= 0) {
+					th->thdev[lowest] = th->thdev[i];
+					th->thdev[i] = NULL;
+					++lowest;
+				}
+
+				continue;
+			}
+
+			if (lowest == -1)
+				lowest = i;
+
+			intel_th_device_remove(th->thdev[i]);
+			th->thdev[i] = NULL;
+		}
+
+		if (lowest >= 0)
+			th->num_thdevs = lowest;
+	}
+
+	if (thdrv->attr_group)
+		sysfs_remove_group(&thdev->dev.kobj, thdrv->attr_group);
+
+	pm_runtime_get_sync(dev);
+
+	thdrv->remove(thdev);
+
+	if (intel_th_output_assigned(thdev)) {
+		struct intel_th_driver *hubdrv =
+			to_intel_th_driver(dev->parent->driver);
+
+		if (hub->dev.driver)
+			/* does not talk to hardware */
+			hubdrv->unassign(hub, thdev);
+	}
+
+	pm_runtime_disable(dev);
+	pm_runtime_set_active(dev);
+	pm_runtime_enable(dev);
+}
+
+static struct bus_type intel_th_bus = {
+	.name		= "intel_th",
+	.match		= intel_th_match,
+	.probe		= intel_th_probe,
+	.remove		= intel_th_remove,
+};
+
+static void intel_th_device_free(struct intel_th_device *thdev);
+
+static void intel_th_device_release(struct device *dev)
+{
+	intel_th_device_free(to_intel_th_device(dev));
+}
+
+static struct device_type intel_th_source_device_type = {
+	.name		= "intel_th_source_device",
+	.release	= intel_th_device_release,
+};
+
+static char *intel_th_output_devnode(const struct device *dev, umode_t *mode,
+				     kuid_t *uid, kgid_t *gid)
+{
+	const struct intel_th_device *thdev = to_intel_th_device(dev);
+	const struct intel_th *th = to_intel_th(thdev);
+	char *node;
+
+	if (thdev->id >= 0)
+		node = kasprintf(GFP_KERNEL, "intel_th%d/%s%d", th->id,
+				 thdev->name, thdev->id);
+	else
+		node = kasprintf(GFP_KERNEL, "intel_th%d/%s", th->id,
+				 thdev->name);
+
+	return node;
+}
+
+static ssize_t port_show(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	struct intel_th_device *thdev = to_intel_th_device(dev);
+
+	if (thdev->output.port >= 0)
+		return scnprintf(buf, PAGE_SIZE, "%u\n", thdev->output.port);
+
+	return scnprintf(buf, PAGE_SIZE, "unassigned\n");
+}
+
+static DEVICE_ATTR_RO(port);
+
+static void intel_th_trace_prepare(struct intel_th_device *thdev)
+{
+	struct intel_th_device *hub = to_intel_th_hub(thdev);
+	struct intel_th_driver *hubdrv = to_intel_th_driver(hub->dev.driver);
+
+	if (hub->type != INTEL_TH_SWITCH)
+		return;
+
+	if (thdev->type != INTEL_TH_OUTPUT)
+		return;
+
+	pm_runtime_get_sync(&thdev->dev);
+	hubdrv->prepare(hub, &thdev->output);
+	pm_runtime_put(&thdev->dev);
+}
+
+static int intel_th_output_activate(struct intel_th_device *thdev)
+{
+	struct intel_th_driver *thdrv =
+		to_intel_th_driver_or_null(thdev->dev.driver);
+	struct intel_th *th = to_intel_th(thdev);
+	int ret = 0;
+
+	if (!thdrv)
+		return -ENODEV;
+
+	if (!try_module_get(thdrv->driver.owner))
+		return -ENODEV;
+
+	pm_runtime_get_sync(&thdev->dev);
+
+	if (th->activate)
+		ret = th->activate(th);
+	if (ret)
+		goto fail_put;
+
+	intel_th_trace_prepare(thdev);
+	if (thdrv->activate)
+		ret = thdrv->activate(thdev);
+	else
+		intel_th_trace_enable(thdev);
+
+	if (ret)
+		goto fail_deactivate;
+
+	return 0;
+
+fail_deactivate:
+	if (th->deactivate)
+		th->deactivate(th);
+
+fail_put:
+	pm_runtime_put(&thdev->dev);
+	module_put(thdrv->driver.owner);
+
+	return ret;
+}
+
+static void intel_th_output_deactivate(struct intel_th_device *thdev)
+{
+	struct intel_th_driver *thdrv =
+		to_intel_th_driver_or_null(thdev->dev.driver);
+	struct intel_th *th = to_intel_th(thdev);
+
+	if (!thdrv)
+		return;
+
+	if (thdrv->deactivate)
+		thdrv->deactivate(thdev);
+	else
+		intel_th_trace_disable(thdev);
+
+	if (th->deactivate)
+		th->deactivate(th);
+
+	pm_runtime_put(&thdev->dev);
+	module_put(thdrv->driver.owner);
+}
+
+static ssize_t active_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	struct intel_th_device *thdev = to_intel_th_device(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", thdev->output.active);
+}
+
+static ssize_t active_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t size)
+{
+	struct intel_th_device *thdev = to_intel_th_device(dev);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (!!val != thdev->output.active) {
+		if (val)
+			ret = intel_th_output_activate(thdev);
+		else
+			intel_th_output_deactivate(thdev);
+	}
+
+	return ret ? ret : size;
+}
+
+static DEVICE_ATTR_RW(active);
+
+static struct attribute *intel_th_output_attrs[] = {
+	&dev_attr_port.attr,
+	&dev_attr_active.attr,
+	NULL,
+};
+
+ATTRIBUTE_GROUPS(intel_th_output);
+
+static struct device_type intel_th_output_device_type = {
+	.name		= "intel_th_output_device",
+	.groups		= intel_th_output_groups,
+	.release	= intel_th_device_release,
+	.devnode	= intel_th_output_devnode,
+};
+
+static struct device_type intel_th_switch_device_type = {
+	.name		= "intel_th_switch_device",
+	.release	= intel_th_device_release,
+};
+
+static struct device_type *intel_th_device_type[] = {
+	[INTEL_TH_SOURCE]	= &intel_th_source_device_type,
+	[INTEL_TH_OUTPUT]	= &intel_th_output_device_type,
+	[INTEL_TH_SWITCH]	= &intel_th_switch_device_type,
+};
+
+int intel_th_driver_register(struct intel_th_driver *thdrv)
+{
+	if (!thdrv->probe || !thdrv->remove)
+		return -EINVAL;
+
+	thdrv->driver.bus = &intel_th_bus;
+
+	return driver_register(&thdrv->driver);
+}
+EXPORT_SYMBOL_GPL(intel_th_driver_register);
+
+void intel_th_driver_unregister(struct intel_th_driver *thdrv)
+{
+	driver_unregister(&thdrv->driver);
+}
+EXPORT_SYMBOL_GPL(intel_th_driver_unregister);
+
+static struct intel_th_device *
+intel_th_device_alloc(struct intel_th *th, unsigned int type, const char *name,
+		      int id)
+{
+	struct device *parent;
+	struct intel_th_device *thdev;
+
+	if (type == INTEL_TH_OUTPUT)
+		parent = &th->hub->dev;
+	else
+		parent = th->dev;
+
+	thdev = kzalloc(sizeof(*thdev) + strlen(name) + 1, GFP_KERNEL);
+	if (!thdev)
+		return NULL;
+
+	thdev->id = id;
+	thdev->type = type;
+
+	strcpy(thdev->name, name);
+	device_initialize(&thdev->dev);
+	thdev->dev.bus = &intel_th_bus;
+	thdev->dev.type = intel_th_device_type[type];
+	thdev->dev.parent = parent;
+	thdev->dev.dma_mask = parent->dma_mask;
+	thdev->dev.dma_parms = parent->dma_parms;
+	dma_set_coherent_mask(&thdev->dev, parent->coherent_dma_mask);
+	if (id >= 0)
+		dev_set_name(&thdev->dev, "%d-%s%d", th->id, name, id);
+	else
+		dev_set_name(&thdev->dev, "%d-%s", th->id, name);
+
+	return thdev;
+}
+
+static int intel_th_device_add_resources(struct intel_th_device *thdev,
+					 struct resource *res, int nres)
+{
+	struct resource *r;
+
+	r = kmemdup(res, sizeof(*res) * nres, GFP_KERNEL);
+	if (!r)
+		return -ENOMEM;
+
+	thdev->resource = r;
+	thdev->num_resources = nres;
+
+	return 0;
+}
+
+static void intel_th_device_remove(struct intel_th_device *thdev)
+{
+	device_del(&thdev->dev);
+	put_device(&thdev->dev);
+}
+
+static void intel_th_device_free(struct intel_th_device *thdev)
+{
+	kfree(thdev->resource);
+	kfree(thdev);
+}
+
+/*
+ * Intel(R) Trace Hub subdevices
+ */
+static const struct intel_th_subdevice {
+	const char		*name;
+	struct resource		res[3];
+	unsigned		nres;
+	unsigned		type;
+	unsigned		otype;
+	bool			mknode;
+	unsigned		scrpd;
+	int			id;
+} intel_th_subdevices[] = {
+	{
+		.nres	= 1,
+		.res	= {
+			{
+				/* Handle TSCU and CTS from GTH driver */
+				.start	= REG_GTH_OFFSET,
+				.end	= REG_CTS_OFFSET + REG_CTS_LENGTH - 1,
+				.flags	= IORESOURCE_MEM,
+			},
+		},
+		.name	= "gth",
+		.type	= INTEL_TH_SWITCH,
+		.id	= -1,
+	},
+	{
+		.nres	= 2,
+		.res	= {
+			{
+				.start	= REG_MSU_OFFSET,
+				.end	= REG_MSU_OFFSET + REG_MSU_LENGTH - 1,
+				.flags	= IORESOURCE_MEM,
+			},
+			{
+				.start	= BUF_MSU_OFFSET,
+				.end	= BUF_MSU_OFFSET + BUF_MSU_LENGTH - 1,
+				.flags	= IORESOURCE_MEM,
+			},
+		},
+		.name	= "msc",
+		.id	= 0,
+		.type	= INTEL_TH_OUTPUT,
+		.mknode	= true,
+		.otype	= GTH_MSU,
+		.scrpd	= SCRPD_MEM_IS_PRIM_DEST | SCRPD_MSC0_IS_ENABLED,
+	},
+	{
+		.nres	= 2,
+		.res	= {
+			{
+				.start	= REG_MSU_OFFSET,
+				.end	= REG_MSU_OFFSET + REG_MSU_LENGTH - 1,
+				.flags	= IORESOURCE_MEM,
+			},
+			{
+				.start	= BUF_MSU_OFFSET,
+				.end	= BUF_MSU_OFFSET + BUF_MSU_LENGTH - 1,
+				.flags	= IORESOURCE_MEM,
+			},
+		},
+		.name	= "msc",
+		.id	= 1,
+		.type	= INTEL_TH_OUTPUT,
+		.mknode	= true,
+		.otype	= GTH_MSU,
+		.scrpd	= SCRPD_MEM_IS_PRIM_DEST | SCRPD_MSC1_IS_ENABLED,
+	},
+	{
+		.nres	= 2,
+		.res	= {
+			{
+				.start	= REG_STH_OFFSET,
+				.end	= REG_STH_OFFSET + REG_STH_LENGTH - 1,
+				.flags	= IORESOURCE_MEM,
+			},
+			{
+				.start	= TH_MMIO_SW,
+				.end	= 0,
+				.flags	= IORESOURCE_MEM,
+			},
+		},
+		.id	= -1,
+		.name	= "sth",
+		.type	= INTEL_TH_SOURCE,
+	},
+	{
+		.nres	= 2,
+		.res	= {
+			{
+				.start	= REG_STH_OFFSET,
+				.end	= REG_STH_OFFSET + REG_STH_LENGTH - 1,
+				.flags	= IORESOURCE_MEM,
+			},
+			{
+				.start	= TH_MMIO_RTIT,
+				.end	= 0,
+				.flags	= IORESOURCE_MEM,
+			},
+		},
+		.id	= -1,
+		.name	= "rtit",
+		.type	= INTEL_TH_SOURCE,
+	},
+	{
+		.nres	= 1,
+		.res	= {
+			{
+				.start	= REG_PTI_OFFSET,
+				.end	= REG_PTI_OFFSET + REG_PTI_LENGTH - 1,
+				.flags	= IORESOURCE_MEM,
+			},
+		},
+		.id	= -1,
+		.name	= "pti",
+		.type	= INTEL_TH_OUTPUT,
+		.otype	= GTH_PTI,
+		.scrpd	= SCRPD_PTI_IS_PRIM_DEST,
+	},
+	{
+		.nres	= 1,
+		.res	= {
+			{
+				.start	= REG_PTI_OFFSET,
+				.end	= REG_PTI_OFFSET + REG_PTI_LENGTH - 1,
+				.flags	= IORESOURCE_MEM,
+			},
+		},
+		.id	= -1,
+		.name	= "lpp",
+		.type	= INTEL_TH_OUTPUT,
+		.otype	= GTH_LPP,
+		.scrpd	= SCRPD_PTI_IS_PRIM_DEST,
+	},
+	{
+		.nres	= 1,
+		.res	= {
+			{
+				.start	= REG_DCIH_OFFSET,
+				.end	= REG_DCIH_OFFSET + REG_DCIH_LENGTH - 1,
+				.flags	= IORESOURCE_MEM,
+			},
+		},
+		.id	= -1,
+		.name	= "dcih",
+		.type	= INTEL_TH_OUTPUT,
+	},
+};
+
+#ifdef CONFIG_MODULES
+static void __intel_th_request_hub_module(struct work_struct *work)
+{
+	struct intel_th *th = container_of(work, struct intel_th,
+					   request_module_work);
+
+	request_module("intel_th_%s", th->hub->name);
+}
+
+static int intel_th_request_hub_module(struct intel_th *th)
+{
+	INIT_WORK(&th->request_module_work, __intel_th_request_hub_module);
+	schedule_work(&th->request_module_work);
+
+	return 0;
+}
+
+static void intel_th_request_hub_module_flush(struct intel_th *th)
+{
+	flush_work(&th->request_module_work);
+}
+#else
+static inline int intel_th_request_hub_module(struct intel_th *th)
+{
+	return -EINVAL;
+}
+
+static inline void intel_th_request_hub_module_flush(struct intel_th *th)
+{
+}
+#endif /* CONFIG_MODULES */
+
+static struct intel_th_device *
+intel_th_subdevice_alloc(struct intel_th *th,
+			 const struct intel_th_subdevice *subdev)
+{
+	struct intel_th_device *thdev;
+	struct resource res[3];
+	unsigned int req = 0;
+	int r, err;
+
+	thdev = intel_th_device_alloc(th, subdev->type, subdev->name,
+				      subdev->id);
+	if (!thdev)
+		return ERR_PTR(-ENOMEM);
+
+	thdev->drvdata = th->drvdata;
+
+	memcpy(res, subdev->res,
+	       sizeof(struct resource) * subdev->nres);
+
+	for (r = 0; r < subdev->nres; r++) {
+		struct resource *devres = th->resource;
+		int bar = TH_MMIO_CONFIG;
+
+		/*
+		 * Take .end == 0 to mean 'take the whole bar',
+		 * .start then tells us which bar it is. Default to
+		 * TH_MMIO_CONFIG.
+		 */
+		if (!res[r].end && res[r].flags == IORESOURCE_MEM) {
+			bar = res[r].start;
+			err = -ENODEV;
+			if (bar >= th->num_resources)
+				goto fail_put_device;
+			res[r].start = 0;
+			res[r].end = resource_size(&devres[bar]) - 1;
+		}
+
+		if (res[r].flags & IORESOURCE_MEM) {
+			res[r].start	+= devres[bar].start;
+			res[r].end	+= devres[bar].start;
+
+			dev_dbg(th->dev, "%s:%d @ %pR\n",
+				subdev->name, r, &res[r]);
+		} else if (res[r].flags & IORESOURCE_IRQ) {
+			/*
+			 * Only pass on the IRQ if we have useful interrupts:
+			 * the ones that can be configured via MINTCTL.
+			 */
+			if (INTEL_TH_CAP(th, has_mintctl) && th->irq != -1)
+				res[r].start = th->irq;
+		}
+	}
+
+	err = intel_th_device_add_resources(thdev, res, subdev->nres);
+	if (err)
+		goto fail_put_device;
+
+	if (subdev->type == INTEL_TH_OUTPUT) {
+		if (subdev->mknode)
+			thdev->dev.devt = MKDEV(th->major, th->num_thdevs);
+		thdev->output.type = subdev->otype;
+		thdev->output.port = -1;
+		thdev->output.scratchpad = subdev->scrpd;
+	} else if (subdev->type == INTEL_TH_SWITCH) {
+		thdev->host_mode =
+			INTEL_TH_CAP(th, host_mode_only) ? true : host_mode;
+		th->hub = thdev;
+	}
+
+	err = device_add(&thdev->dev);
+	if (err)
+		goto fail_free_res;
+
+	/* need switch driver to be loaded to enumerate the rest */
+	if (subdev->type == INTEL_TH_SWITCH && !req) {
+		err = intel_th_request_hub_module(th);
+		if (!err)
+			req++;
+	}
+
+	return thdev;
+
+fail_free_res:
+	kfree(thdev->resource);
+
+fail_put_device:
+	put_device(&thdev->dev);
+
+	return ERR_PTR(err);
+}
+
+/**
+ * intel_th_output_enable() - find and enable a device for a given output type
+ * @th:		Intel TH instance
+ * @otype:	output type
+ *
+ * Go through the unallocated output devices, find the first one whos type
+ * matches @otype and instantiate it. These devices are removed when the hub
+ * device is removed, see intel_th_remove().
+ */
+int intel_th_output_enable(struct intel_th *th, unsigned int otype)
+{
+	struct intel_th_device *thdev;
+	int src = 0, dst = 0;
+
+	for (src = 0, dst = 0; dst <= th->num_thdevs; src++, dst++) {
+		for (; src < ARRAY_SIZE(intel_th_subdevices); src++) {
+			if (intel_th_subdevices[src].type != INTEL_TH_OUTPUT)
+				continue;
+
+			if (intel_th_subdevices[src].otype != otype)
+				continue;
+
+			break;
+		}
+
+		/* no unallocated matching subdevices */
+		if (src == ARRAY_SIZE(intel_th_subdevices))
+			return -ENODEV;
+
+		for (; dst < th->num_thdevs; dst++) {
+			if (th->thdev[dst]->type != INTEL_TH_OUTPUT)
+				continue;
+
+			if (th->thdev[dst]->output.type != otype)
+				continue;
+
+			break;
+		}
+
+		/*
+		 * intel_th_subdevices[src] matches our requirements and is
+		 * not matched in th::thdev[]
+		 */
+		if (dst == th->num_thdevs)
+			goto found;
+	}
+
+	return -ENODEV;
+
+found:
+	thdev = intel_th_subdevice_alloc(th, &intel_th_subdevices[src]);
+	if (IS_ERR(thdev))
+		return PTR_ERR(thdev);
+
+	th->thdev[th->num_thdevs++] = thdev;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(intel_th_output_enable);
+
+static int intel_th_populate(struct intel_th *th)
+{
+	int src;
+
+	/* create devices for each intel_th_subdevice */
+	for (src = 0; src < ARRAY_SIZE(intel_th_subdevices); src++) {
+		const struct intel_th_subdevice *subdev =
+			&intel_th_subdevices[src];
+		struct intel_th_device *thdev;
+
+		/* only allow SOURCE and SWITCH devices in host mode */
+		if ((INTEL_TH_CAP(th, host_mode_only) || host_mode) &&
+		    subdev->type == INTEL_TH_OUTPUT)
+			continue;
+
+		/*
+		 * don't enable port OUTPUTs in this path; SWITCH enables them
+		 * via intel_th_output_enable()
+		 */
+		if (subdev->type == INTEL_TH_OUTPUT &&
+		    subdev->otype != GTH_NONE)
+			continue;
+
+		thdev = intel_th_subdevice_alloc(th, subdev);
+		/* note: caller should free subdevices from th::thdev[] */
+		if (IS_ERR(thdev)) {
+			/* ENODEV for individual subdevices is allowed */
+			if (PTR_ERR(thdev) == -ENODEV)
+				continue;
+
+			return PTR_ERR(thdev);
+		}
+
+		th->thdev[th->num_thdevs++] = thdev;
+	}
+
+	return 0;
+}
+
+static int intel_th_output_open(struct inode *inode, struct file *file)
+{
+	const struct file_operations *fops;
+	struct intel_th_driver *thdrv;
+	struct device *dev;
+	int err;
+
+	dev = bus_find_device_by_devt(&intel_th_bus, inode->i_rdev);
+	if (!dev || !dev->driver)
+		return -ENODEV;
+
+	thdrv = to_intel_th_driver(dev->driver);
+	fops = fops_get(thdrv->fops);
+	if (!fops)
+		return -ENODEV;
+
+	replace_fops(file, fops);
+
+	file->private_data = to_intel_th_device(dev);
+
+	if (file->f_op->open) {
+		err = file->f_op->open(inode, file);
+		return err;
+	}
+
+	return 0;
+}
+
+static const struct file_operations intel_th_output_fops = {
+	.open	= intel_th_output_open,
+	.llseek	= noop_llseek,
+};
+
+static irqreturn_t intel_th_irq(int irq, void *data)
+{
+	struct intel_th *th = data;
+	irqreturn_t ret = IRQ_NONE;
+	struct intel_th_driver *d;
+	int i;
+
+	for (i = 0; i < th->num_thdevs; i++) {
+		if (th->thdev[i]->type != INTEL_TH_OUTPUT)
+			continue;
+
+		d = to_intel_th_driver(th->thdev[i]->dev.driver);
+		if (d && d->irq)
+			ret |= d->irq(th->thdev[i]);
+	}
+
+	return ret;
+}
+
+/**
+ * intel_th_alloc() - allocate a new Intel TH device and its subdevices
+ * @dev:	parent device
+ * @devres:	resources indexed by th_mmio_idx
+ * @irq:	irq number
+ */
+struct intel_th *
+intel_th_alloc(struct device *dev, const struct intel_th_drvdata *drvdata,
+	       struct resource *devres, unsigned int ndevres)
+{
+	int err, r, nr_mmios = 0;
+	struct intel_th *th;
+
+	th = kzalloc(sizeof(*th), GFP_KERNEL);
+	if (!th)
+		return ERR_PTR(-ENOMEM);
+
+	th->id = ida_simple_get(&intel_th_ida, 0, 0, GFP_KERNEL);
+	if (th->id < 0) {
+		err = th->id;
+		goto err_alloc;
+	}
+
+	th->major = __register_chrdev(0, 0, TH_POSSIBLE_OUTPUTS,
+				      "intel_th/output", &intel_th_output_fops);
+	if (th->major < 0) {
+		err = th->major;
+		goto err_ida;
+	}
+	th->irq = -1;
+	th->dev = dev;
+	th->drvdata = drvdata;
+
+	for (r = 0; r < ndevres; r++)
+		switch (devres[r].flags & IORESOURCE_TYPE_BITS) {
+		case IORESOURCE_MEM:
+			th->resource[nr_mmios++] = devres[r];
+			break;
+		case IORESOURCE_IRQ:
+			err = devm_request_irq(dev, devres[r].start,
+					       intel_th_irq, IRQF_SHARED,
+					       dev_name(dev), th);
+			if (err)
+				goto err_chrdev;
+
+			if (th->irq == -1)
+				th->irq = devres[r].start;
+			th->num_irqs++;
+			break;
+		default:
+			dev_warn(dev, "Unknown resource type %lx\n",
+				 devres[r].flags);
+			break;
+		}
+
+	th->num_resources = nr_mmios;
+
+	dev_set_drvdata(dev, th);
+
+	pm_runtime_no_callbacks(dev);
+	pm_runtime_put(dev);
+	pm_runtime_allow(dev);
+
+	err = intel_th_populate(th);
+	if (err) {
+		/* free the subdevices and undo everything */
+		intel_th_free(th);
+		return ERR_PTR(err);
+	}
+
+	return th;
+
+err_chrdev:
+	__unregister_chrdev(th->major, 0, TH_POSSIBLE_OUTPUTS,
+			    "intel_th/output");
+
+err_ida:
+	ida_simple_remove(&intel_th_ida, th->id);
+
+err_alloc:
+	kfree(th);
+
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(intel_th_alloc);
+
+void intel_th_free(struct intel_th *th)
+{
+	int i;
+
+	intel_th_request_hub_module_flush(th);
+
+	intel_th_device_remove(th->hub);
+	for (i = 0; i < th->num_thdevs; i++) {
+		if (th->thdev[i] != th->hub)
+			intel_th_device_remove(th->thdev[i]);
+		th->thdev[i] = NULL;
+	}
+
+	th->num_thdevs = 0;
+
+	for (i = 0; i < th->num_irqs; i++)
+		devm_free_irq(th->dev, th->irq + i, th);
+
+	pm_runtime_get_sync(th->dev);
+	pm_runtime_forbid(th->dev);
+
+	__unregister_chrdev(th->major, 0, TH_POSSIBLE_OUTPUTS,
+			    "intel_th/output");
+
+	ida_simple_remove(&intel_th_ida, th->id);
+
+	kfree(th);
+}
+EXPORT_SYMBOL_GPL(intel_th_free);
+
+/**
+ * intel_th_trace_enable() - enable tracing for an output device
+ * @thdev:	output device that requests tracing be enabled
+ */
+int intel_th_trace_enable(struct intel_th_device *thdev)
+{
+	struct intel_th_device *hub = to_intel_th_device(thdev->dev.parent);
+	struct intel_th_driver *hubdrv = to_intel_th_driver(hub->dev.driver);
+
+	if (WARN_ON_ONCE(hub->type != INTEL_TH_SWITCH))
+		return -EINVAL;
+
+	if (WARN_ON_ONCE(thdev->type != INTEL_TH_OUTPUT))
+		return -EINVAL;
+
+	pm_runtime_get_sync(&thdev->dev);
+	hubdrv->enable(hub, &thdev->output);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(intel_th_trace_enable);
+
+/**
+ * intel_th_trace_switch() - execute a switch sequence
+ * @thdev:	output device that requests tracing switch
+ */
+int intel_th_trace_switch(struct intel_th_device *thdev)
+{
+	struct intel_th_device *hub = to_intel_th_device(thdev->dev.parent);
+	struct intel_th_driver *hubdrv = to_intel_th_driver(hub->dev.driver);
+
+	if (WARN_ON_ONCE(hub->type != INTEL_TH_SWITCH))
+		return -EINVAL;
+
+	if (WARN_ON_ONCE(thdev->type != INTEL_TH_OUTPUT))
+		return -EINVAL;
+
+	hubdrv->trig_switch(hub, &thdev->output);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(intel_th_trace_switch);
+
+/**
+ * intel_th_trace_disable() - disable tracing for an output device
+ * @thdev:	output device that requests tracing be disabled
+ */
+int intel_th_trace_disable(struct intel_th_device *thdev)
+{
+	struct intel_th_device *hub = to_intel_th_device(thdev->dev.parent);
+	struct intel_th_driver *hubdrv = to_intel_th_driver(hub->dev.driver);
+
+	WARN_ON_ONCE(hub->type != INTEL_TH_SWITCH);
+	if (WARN_ON_ONCE(thdev->type != INTEL_TH_OUTPUT))
+		return -EINVAL;
+
+	hubdrv->disable(hub, &thdev->output);
+	pm_runtime_put(&thdev->dev);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(intel_th_trace_disable);
+
+int intel_th_set_output(struct intel_th_device *thdev,
+			unsigned int master)
+{
+	struct intel_th_device *hub = to_intel_th_hub(thdev);
+	struct intel_th_driver *hubdrv = to_intel_th_driver(hub->dev.driver);
+	int ret;
+
+	/* In host mode, this is up to the external debugger, do nothing. */
+	if (hub->host_mode)
+		return 0;
+
+	/*
+	 * hub is instantiated together with the source device that
+	 * calls here, so guaranteed to be present.
+	 */
+	hubdrv = to_intel_th_driver(hub->dev.driver);
+	if (!hubdrv || !try_module_get(hubdrv->driver.owner))
+		return -EINVAL;
+
+	if (!hubdrv->set_output) {
+		ret = -ENOTSUPP;
+		goto out;
+	}
+
+	ret = hubdrv->set_output(hub, master);
+
+out:
+	module_put(hubdrv->driver.owner);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(intel_th_set_output);
+
+static int __init intel_th_init(void)
+{
+	intel_th_debug_init();
+
+	return bus_register(&intel_th_bus);
+}
+subsys_initcall(intel_th_init);
+
+static void __exit intel_th_exit(void)
+{
+	intel_th_debug_done();
+
+	bus_unregister(&intel_th_bus);
+}
+module_exit(intel_th_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel(R) Trace Hub controller driver");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>");
diff --git a/drivers/hwtracing/intel_th/debug.c b/drivers/hwtracing/intel_th/debug.c
new file mode 100644
index 0000000000..ff79063118
--- /dev/null
+++ b/drivers/hwtracing/intel_th/debug.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel(R) Trace Hub driver debugging
+ *
+ * Copyright (C) 2014-2015 Intel Corporation.
+ */
+
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/debugfs.h>
+
+#include "intel_th.h"
+#include "debug.h"
+
+struct dentry *intel_th_dbg;
+
+void intel_th_debug_init(void)
+{
+	intel_th_dbg = debugfs_create_dir("intel_th", NULL);
+	if (IS_ERR(intel_th_dbg))
+		intel_th_dbg = NULL;
+}
+
+void intel_th_debug_done(void)
+{
+	debugfs_remove(intel_th_dbg);
+	intel_th_dbg = NULL;
+}
diff --git a/drivers/hwtracing/intel_th/debug.h b/drivers/hwtracing/intel_th/debug.h
new file mode 100644
index 0000000000..78bd7e4bf9
--- /dev/null
+++ b/drivers/hwtracing/intel_th/debug.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Intel(R) Trace Hub driver debugging
+ *
+ * Copyright (C) 2014-2015 Intel Corporation.
+ */
+
+#ifndef __INTEL_TH_DEBUG_H__
+#define __INTEL_TH_DEBUG_H__
+
+#ifdef CONFIG_INTEL_TH_DEBUG
+extern struct dentry *intel_th_dbg;
+
+void intel_th_debug_init(void);
+void intel_th_debug_done(void);
+#else
+static inline void intel_th_debug_init(void)
+{
+}
+
+static inline void intel_th_debug_done(void)
+{
+}
+#endif
+
+#endif /* __INTEL_TH_DEBUG_H__ */
diff --git a/drivers/hwtracing/intel_th/gth.c b/drivers/hwtracing/intel_th/gth.c
new file mode 100644
index 0000000000..b3308934a6
--- /dev/null
+++ b/drivers/hwtracing/intel_th/gth.c
@@ -0,0 +1,850 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel(R) Trace Hub Global Trace Hub
+ *
+ * Copyright (C) 2014-2015 Intel Corporation.
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/bitmap.h>
+#include <linux/pm_runtime.h>
+
+#include "intel_th.h"
+#include "gth.h"
+
+struct gth_device;
+
+/**
+ * struct gth_output - GTH view on an output port
+ * @gth:	backlink to the GTH device
+ * @output:	link to output device's output descriptor
+ * @index:	output port number
+ * @port_type:	one of GTH_* port type values
+ * @master:	bitmap of masters configured for this output
+ */
+struct gth_output {
+	struct gth_device	*gth;
+	struct intel_th_output	*output;
+	unsigned int		index;
+	unsigned int		port_type;
+	DECLARE_BITMAP(master, TH_CONFIGURABLE_MASTERS + 1);
+};
+
+/**
+ * struct gth_device - GTH device
+ * @dev:	driver core's device
+ * @base:	register window base address
+ * @output_group:	attributes describing output ports
+ * @master_group:	attributes describing master assignments
+ * @output:		output ports
+ * @master:		master/output port assignments
+ * @gth_lock:		serializes accesses to GTH bits
+ */
+struct gth_device {
+	struct device		*dev;
+	void __iomem		*base;
+
+	struct attribute_group	output_group;
+	struct attribute_group	master_group;
+	struct gth_output	output[TH_POSSIBLE_OUTPUTS];
+	signed char		master[TH_CONFIGURABLE_MASTERS + 1];
+	spinlock_t		gth_lock;
+};
+
+static void gth_output_set(struct gth_device *gth, int port,
+			   unsigned int config)
+{
+	unsigned long reg = port & 4 ? REG_GTH_GTHOPT1 : REG_GTH_GTHOPT0;
+	u32 val;
+	int shift = (port & 3) * 8;
+
+	val = ioread32(gth->base + reg);
+	val &= ~(0xff << shift);
+	val |= config << shift;
+	iowrite32(val, gth->base + reg);
+}
+
+static unsigned int gth_output_get(struct gth_device *gth, int port)
+{
+	unsigned long reg = port & 4 ? REG_GTH_GTHOPT1 : REG_GTH_GTHOPT0;
+	u32 val;
+	int shift = (port & 3) * 8;
+
+	val = ioread32(gth->base + reg);
+	val &= 0xff << shift;
+	val >>= shift;
+
+	return val;
+}
+
+static void gth_smcfreq_set(struct gth_device *gth, int port,
+			    unsigned int freq)
+{
+	unsigned long reg = REG_GTH_SMCR0 + ((port / 2) * 4);
+	int shift = (port & 1) * 16;
+	u32 val;
+
+	val = ioread32(gth->base + reg);
+	val &= ~(0xffff << shift);
+	val |= freq << shift;
+	iowrite32(val, gth->base + reg);
+}
+
+static unsigned int gth_smcfreq_get(struct gth_device *gth, int port)
+{
+	unsigned long reg = REG_GTH_SMCR0 + ((port / 2) * 4);
+	int shift = (port & 1) * 16;
+	u32 val;
+
+	val = ioread32(gth->base + reg);
+	val &= 0xffff << shift;
+	val >>= shift;
+
+	return val;
+}
+
+/*
+ * "masters" attribute group
+ */
+
+struct master_attribute {
+	struct device_attribute	attr;
+	struct gth_device	*gth;
+	unsigned int		master;
+};
+
+static void
+gth_master_set(struct gth_device *gth, unsigned int master, int port)
+{
+	unsigned int reg = REG_GTH_SWDEST0 + ((master >> 1) & ~3u);
+	unsigned int shift = (master & 0x7) * 4;
+	u32 val;
+
+	if (master >= 256) {
+		reg = REG_GTH_GSWTDEST;
+		shift = 0;
+	}
+
+	val = ioread32(gth->base + reg);
+	val &= ~(0xf << shift);
+	if (port >= 0)
+		val |= (0x8 | port) << shift;
+	iowrite32(val, gth->base + reg);
+}
+
+static ssize_t master_attr_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct master_attribute *ma =
+		container_of(attr, struct master_attribute, attr);
+	struct gth_device *gth = ma->gth;
+	size_t count;
+	int port;
+
+	spin_lock(&gth->gth_lock);
+	port = gth->master[ma->master];
+	spin_unlock(&gth->gth_lock);
+
+	if (port >= 0)
+		count = snprintf(buf, PAGE_SIZE, "%x\n", port);
+	else
+		count = snprintf(buf, PAGE_SIZE, "disabled\n");
+
+	return count;
+}
+
+static ssize_t master_attr_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct master_attribute *ma =
+		container_of(attr, struct master_attribute, attr);
+	struct gth_device *gth = ma->gth;
+	int old_port, port;
+
+	if (kstrtoint(buf, 10, &port) < 0)
+		return -EINVAL;
+
+	if (port >= TH_POSSIBLE_OUTPUTS || port < -1)
+		return -EINVAL;
+
+	spin_lock(&gth->gth_lock);
+
+	/* disconnect from the previous output port, if any */
+	old_port = gth->master[ma->master];
+	if (old_port >= 0) {
+		gth->master[ma->master] = -1;
+		clear_bit(ma->master, gth->output[old_port].master);
+
+		/*
+		 * if the port is active, program this setting,
+		 * implies that runtime PM is on
+		 */
+		if (gth->output[old_port].output->active)
+			gth_master_set(gth, ma->master, -1);
+	}
+
+	/* connect to the new output port, if any */
+	if (port >= 0) {
+		/* check if there's a driver for this port */
+		if (!gth->output[port].output) {
+			count = -ENODEV;
+			goto unlock;
+		}
+
+		set_bit(ma->master, gth->output[port].master);
+
+		/* if the port is active, program this setting, see above */
+		if (gth->output[port].output->active)
+			gth_master_set(gth, ma->master, port);
+	}
+
+	gth->master[ma->master] = port;
+
+unlock:
+	spin_unlock(&gth->gth_lock);
+
+	return count;
+}
+
+struct output_attribute {
+	struct device_attribute attr;
+	struct gth_device	*gth;
+	unsigned int		port;
+	unsigned int		parm;
+};
+
+#define OUTPUT_PARM(_name, _mask, _r, _w, _what)			\
+	[TH_OUTPUT_PARM(_name)] = { .name = __stringify(_name),		\
+				    .get = gth_ ## _what ## _get,	\
+				    .set = gth_ ## _what ## _set,	\
+				    .mask = (_mask),			\
+				    .readable = (_r),			\
+				    .writable = (_w) }
+
+static const struct output_parm {
+	const char	*name;
+	unsigned int	(*get)(struct gth_device *gth, int port);
+	void		(*set)(struct gth_device *gth, int port,
+			       unsigned int val);
+	unsigned int	mask;
+	unsigned int	readable : 1,
+			writable : 1;
+} output_parms[] = {
+	OUTPUT_PARM(port,	0x7,	1, 0, output),
+	OUTPUT_PARM(null,	BIT(3),	1, 1, output),
+	OUTPUT_PARM(drop,	BIT(4),	1, 1, output),
+	OUTPUT_PARM(reset,	BIT(5),	1, 0, output),
+	OUTPUT_PARM(flush,	BIT(7),	0, 1, output),
+	OUTPUT_PARM(smcfreq,	0xffff,	1, 1, smcfreq),
+};
+
+static void
+gth_output_parm_set(struct gth_device *gth, int port, unsigned int parm,
+		    unsigned int val)
+{
+	unsigned int config = output_parms[parm].get(gth, port);
+	unsigned int mask = output_parms[parm].mask;
+	unsigned int shift = __ffs(mask);
+
+	config &= ~mask;
+	config |= (val << shift) & mask;
+	output_parms[parm].set(gth, port, config);
+}
+
+static unsigned int
+gth_output_parm_get(struct gth_device *gth, int port, unsigned int parm)
+{
+	unsigned int config = output_parms[parm].get(gth, port);
+	unsigned int mask = output_parms[parm].mask;
+	unsigned int shift = __ffs(mask);
+
+	config &= mask;
+	config >>= shift;
+	return config;
+}
+
+/*
+ * Reset outputs and sources
+ */
+static int intel_th_gth_reset(struct gth_device *gth)
+{
+	u32 reg;
+	int port, i;
+
+	reg = ioread32(gth->base + REG_GTH_SCRPD0);
+	if (reg & SCRPD_DEBUGGER_IN_USE)
+		return -EBUSY;
+
+	/* Always save/restore STH and TU registers in S0ix entry/exit */
+	reg |= SCRPD_STH_IS_ENABLED | SCRPD_TRIGGER_IS_ENABLED;
+	iowrite32(reg, gth->base + REG_GTH_SCRPD0);
+
+	/* output ports */
+	for (port = 0; port < 8; port++) {
+		if (gth_output_parm_get(gth, port, TH_OUTPUT_PARM(port)) ==
+		    GTH_NONE)
+			continue;
+
+		gth_output_set(gth, port, 0);
+		gth_smcfreq_set(gth, port, 16);
+	}
+	/* disable overrides */
+	iowrite32(0, gth->base + REG_GTH_DESTOVR);
+
+	/* masters swdest_0~31 and gswdest */
+	for (i = 0; i < 33; i++)
+		iowrite32(0, gth->base + REG_GTH_SWDEST0 + i * 4);
+
+	/* sources */
+	iowrite32(0, gth->base + REG_GTH_SCR);
+	iowrite32(0xfc, gth->base + REG_GTH_SCR2);
+
+	/* setup CTS for single trigger */
+	iowrite32(CTS_EVENT_ENABLE_IF_ANYTHING, gth->base + REG_CTS_C0S0_EN);
+	iowrite32(CTS_ACTION_CONTROL_SET_STATE(CTS_STATE_IDLE) |
+		  CTS_ACTION_CONTROL_TRIGGER, gth->base + REG_CTS_C0S0_ACT);
+
+	return 0;
+}
+
+/*
+ * "outputs" attribute group
+ */
+
+static ssize_t output_attr_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct output_attribute *oa =
+		container_of(attr, struct output_attribute, attr);
+	struct gth_device *gth = oa->gth;
+	size_t count;
+
+	pm_runtime_get_sync(dev);
+
+	spin_lock(&gth->gth_lock);
+	count = snprintf(buf, PAGE_SIZE, "%x\n",
+			 gth_output_parm_get(gth, oa->port, oa->parm));
+	spin_unlock(&gth->gth_lock);
+
+	pm_runtime_put(dev);
+
+	return count;
+}
+
+static ssize_t output_attr_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct output_attribute *oa =
+		container_of(attr, struct output_attribute, attr);
+	struct gth_device *gth = oa->gth;
+	unsigned int config;
+
+	if (kstrtouint(buf, 16, &config) < 0)
+		return -EINVAL;
+
+	pm_runtime_get_sync(dev);
+
+	spin_lock(&gth->gth_lock);
+	gth_output_parm_set(gth, oa->port, oa->parm, config);
+	spin_unlock(&gth->gth_lock);
+
+	pm_runtime_put(dev);
+
+	return count;
+}
+
+static int intel_th_master_attributes(struct gth_device *gth)
+{
+	struct master_attribute *master_attrs;
+	struct attribute **attrs;
+	int i, nattrs = TH_CONFIGURABLE_MASTERS + 2;
+
+	attrs = devm_kcalloc(gth->dev, nattrs, sizeof(void *), GFP_KERNEL);
+	if (!attrs)
+		return -ENOMEM;
+
+	master_attrs = devm_kcalloc(gth->dev, nattrs,
+				    sizeof(struct master_attribute),
+				    GFP_KERNEL);
+	if (!master_attrs)
+		return -ENOMEM;
+
+	for (i = 0; i < TH_CONFIGURABLE_MASTERS + 1; i++) {
+		char *name;
+
+		name = devm_kasprintf(gth->dev, GFP_KERNEL, "%d%s", i,
+				      i == TH_CONFIGURABLE_MASTERS ? "+" : "");
+		if (!name)
+			return -ENOMEM;
+
+		master_attrs[i].attr.attr.name = name;
+		master_attrs[i].attr.attr.mode = S_IRUGO | S_IWUSR;
+		master_attrs[i].attr.show = master_attr_show;
+		master_attrs[i].attr.store = master_attr_store;
+
+		sysfs_attr_init(&master_attrs[i].attr.attr);
+		attrs[i] = &master_attrs[i].attr.attr;
+
+		master_attrs[i].gth = gth;
+		master_attrs[i].master = i;
+	}
+
+	gth->master_group.name	= "masters";
+	gth->master_group.attrs = attrs;
+
+	return sysfs_create_group(&gth->dev->kobj, &gth->master_group);
+}
+
+static int intel_th_output_attributes(struct gth_device *gth)
+{
+	struct output_attribute *out_attrs;
+	struct attribute **attrs;
+	int i, j, nouts = TH_POSSIBLE_OUTPUTS;
+	int nparms = ARRAY_SIZE(output_parms);
+	int nattrs = nouts * nparms + 1;
+
+	attrs = devm_kcalloc(gth->dev, nattrs, sizeof(void *), GFP_KERNEL);
+	if (!attrs)
+		return -ENOMEM;
+
+	out_attrs = devm_kcalloc(gth->dev, nattrs,
+				 sizeof(struct output_attribute),
+				 GFP_KERNEL);
+	if (!out_attrs)
+		return -ENOMEM;
+
+	for (i = 0; i < nouts; i++) {
+		for (j = 0; j < nparms; j++) {
+			unsigned int idx = i * nparms + j;
+			char *name;
+
+			name = devm_kasprintf(gth->dev, GFP_KERNEL, "%d_%s", i,
+					      output_parms[j].name);
+			if (!name)
+				return -ENOMEM;
+
+			out_attrs[idx].attr.attr.name = name;
+
+			if (output_parms[j].readable) {
+				out_attrs[idx].attr.attr.mode |= S_IRUGO;
+				out_attrs[idx].attr.show = output_attr_show;
+			}
+
+			if (output_parms[j].writable) {
+				out_attrs[idx].attr.attr.mode |= S_IWUSR;
+				out_attrs[idx].attr.store = output_attr_store;
+			}
+
+			sysfs_attr_init(&out_attrs[idx].attr.attr);
+			attrs[idx] = &out_attrs[idx].attr.attr;
+
+			out_attrs[idx].gth = gth;
+			out_attrs[idx].port = i;
+			out_attrs[idx].parm = j;
+		}
+	}
+
+	gth->output_group.name	= "outputs";
+	gth->output_group.attrs = attrs;
+
+	return sysfs_create_group(&gth->dev->kobj, &gth->output_group);
+}
+
+/**
+ * intel_th_gth_stop() - stop tracing to an output device
+ * @gth:		GTH device
+ * @output:		output device's descriptor
+ * @capture_done:	set when no more traces will be captured
+ *
+ * This will stop tracing using force storeEn off signal and wait for the
+ * pipelines to be empty for the corresponding output port.
+ */
+static void intel_th_gth_stop(struct gth_device *gth,
+			      struct intel_th_output *output,
+			      bool capture_done)
+{
+	struct intel_th_device *outdev =
+		container_of(output, struct intel_th_device, output);
+	struct intel_th_driver *outdrv =
+		to_intel_th_driver(outdev->dev.driver);
+	unsigned long count;
+	u32 reg;
+	u32 scr2 = 0xfc | (capture_done ? 1 : 0);
+
+	iowrite32(0, gth->base + REG_GTH_SCR);
+	iowrite32(scr2, gth->base + REG_GTH_SCR2);
+
+	/* wait on pipeline empty for the given port */
+	for (reg = 0, count = GTH_PLE_WAITLOOP_DEPTH;
+	     count && !(reg & BIT(output->port)); count--) {
+		reg = ioread32(gth->base + REG_GTH_STAT);
+		cpu_relax();
+	}
+
+	if (!count)
+		dev_dbg(gth->dev, "timeout waiting for GTH[%d] PLE\n",
+			output->port);
+
+	/* wait on output piepline empty */
+	if (outdrv->wait_empty)
+		outdrv->wait_empty(outdev);
+
+	/* clear force capture done for next captures */
+	iowrite32(0xfc, gth->base + REG_GTH_SCR2);
+}
+
+/**
+ * intel_th_gth_start() - start tracing to an output device
+ * @gth:	GTH device
+ * @output:	output device's descriptor
+ *
+ * This will start tracing using force storeEn signal.
+ */
+static void intel_th_gth_start(struct gth_device *gth,
+			       struct intel_th_output *output)
+{
+	u32 scr = 0xfc0000;
+
+	if (output->multiblock)
+		scr |= 0xff;
+
+	iowrite32(scr, gth->base + REG_GTH_SCR);
+	iowrite32(0, gth->base + REG_GTH_SCR2);
+}
+
+/**
+ * intel_th_gth_disable() - disable tracing to an output device
+ * @thdev:	GTH device
+ * @output:	output device's descriptor
+ *
+ * This will deconfigure all masters set to output to this device,
+ * disable tracing using force storeEn off signal and wait for the
+ * "pipeline empty" bit for corresponding output port.
+ */
+static void intel_th_gth_disable(struct intel_th_device *thdev,
+				 struct intel_th_output *output)
+{
+	struct gth_device *gth = dev_get_drvdata(&thdev->dev);
+	int master;
+	u32 reg;
+
+	spin_lock(&gth->gth_lock);
+	output->active = false;
+
+	for_each_set_bit(master, gth->output[output->port].master,
+			 TH_CONFIGURABLE_MASTERS + 1) {
+		gth_master_set(gth, master, -1);
+	}
+	spin_unlock(&gth->gth_lock);
+
+	intel_th_gth_stop(gth, output, true);
+
+	reg = ioread32(gth->base + REG_GTH_SCRPD0);
+	reg &= ~output->scratchpad;
+	iowrite32(reg, gth->base + REG_GTH_SCRPD0);
+}
+
+static void gth_tscu_resync(struct gth_device *gth)
+{
+	u32 reg;
+
+	reg = ioread32(gth->base + REG_TSCU_TSUCTRL);
+	reg &= ~TSUCTRL_CTCRESYNC;
+	iowrite32(reg, gth->base + REG_TSCU_TSUCTRL);
+}
+
+static void intel_th_gth_prepare(struct intel_th_device *thdev,
+				 struct intel_th_output *output)
+{
+	struct gth_device *gth = dev_get_drvdata(&thdev->dev);
+	int count;
+
+	/*
+	 * Wait until the output port is in reset before we start
+	 * programming it.
+	 */
+	for (count = GTH_PLE_WAITLOOP_DEPTH;
+	     count && !(gth_output_get(gth, output->port) & BIT(5)); count--)
+		cpu_relax();
+}
+
+/**
+ * intel_th_gth_enable() - enable tracing to an output device
+ * @thdev:	GTH device
+ * @output:	output device's descriptor
+ *
+ * This will configure all masters set to output to this device and
+ * enable tracing using force storeEn signal.
+ */
+static void intel_th_gth_enable(struct intel_th_device *thdev,
+				struct intel_th_output *output)
+{
+	struct gth_device *gth = dev_get_drvdata(&thdev->dev);
+	struct intel_th *th = to_intel_th(thdev);
+	int master;
+	u32 scrpd;
+
+	spin_lock(&gth->gth_lock);
+	for_each_set_bit(master, gth->output[output->port].master,
+			 TH_CONFIGURABLE_MASTERS + 1) {
+		gth_master_set(gth, master, output->port);
+	}
+
+	output->active = true;
+	spin_unlock(&gth->gth_lock);
+
+	if (INTEL_TH_CAP(th, tscu_enable))
+		gth_tscu_resync(gth);
+
+	scrpd = ioread32(gth->base + REG_GTH_SCRPD0);
+	scrpd |= output->scratchpad;
+	iowrite32(scrpd, gth->base + REG_GTH_SCRPD0);
+
+	intel_th_gth_start(gth, output);
+}
+
+/**
+ * intel_th_gth_switch() - execute a switch sequence
+ * @thdev:	GTH device
+ * @output:	output device's descriptor
+ *
+ * This will execute a switch sequence that will trigger a switch window
+ * when tracing to MSC in multi-block mode.
+ */
+static void intel_th_gth_switch(struct intel_th_device *thdev,
+				struct intel_th_output *output)
+{
+	struct gth_device *gth = dev_get_drvdata(&thdev->dev);
+	unsigned long count;
+	u32 reg;
+
+	/* trigger */
+	iowrite32(0, gth->base + REG_CTS_CTL);
+	iowrite32(CTS_CTL_SEQUENCER_ENABLE, gth->base + REG_CTS_CTL);
+	/* wait on trigger status */
+	for (reg = 0, count = CTS_TRIG_WAITLOOP_DEPTH;
+	     count && !(reg & BIT(4)); count--) {
+		reg = ioread32(gth->base + REG_CTS_STAT);
+		cpu_relax();
+	}
+	if (!count)
+		dev_dbg(&thdev->dev, "timeout waiting for CTS Trigger\n");
+
+	/* De-assert the trigger */
+	iowrite32(0, gth->base + REG_CTS_CTL);
+
+	intel_th_gth_stop(gth, output, false);
+	intel_th_gth_start(gth, output);
+}
+
+/**
+ * intel_th_gth_assign() - assign output device to a GTH output port
+ * @thdev:	GTH device
+ * @othdev:	output device
+ *
+ * This will match a given output device parameters against present
+ * output ports on the GTH and fill out relevant bits in output device's
+ * descriptor.
+ *
+ * Return:	0 on success, -errno on error.
+ */
+static int intel_th_gth_assign(struct intel_th_device *thdev,
+			       struct intel_th_device *othdev)
+{
+	struct gth_device *gth = dev_get_drvdata(&thdev->dev);
+	int i, id;
+
+	if (thdev->host_mode)
+		return -EBUSY;
+
+	if (othdev->type != INTEL_TH_OUTPUT)
+		return -EINVAL;
+
+	for (i = 0, id = 0; i < TH_POSSIBLE_OUTPUTS; i++) {
+		if (gth->output[i].port_type != othdev->output.type)
+			continue;
+
+		if (othdev->id == -1 || othdev->id == id)
+			goto found;
+
+		id++;
+	}
+
+	return -ENOENT;
+
+found:
+	spin_lock(&gth->gth_lock);
+	othdev->output.port = i;
+	othdev->output.active = false;
+	gth->output[i].output = &othdev->output;
+	spin_unlock(&gth->gth_lock);
+
+	return 0;
+}
+
+/**
+ * intel_th_gth_unassign() - deassociate an output device from its output port
+ * @thdev:	GTH device
+ * @othdev:	output device
+ */
+static void intel_th_gth_unassign(struct intel_th_device *thdev,
+				  struct intel_th_device *othdev)
+{
+	struct gth_device *gth = dev_get_drvdata(&thdev->dev);
+	int port = othdev->output.port;
+	int master;
+
+	if (thdev->host_mode)
+		return;
+
+	spin_lock(&gth->gth_lock);
+	othdev->output.port = -1;
+	othdev->output.active = false;
+	gth->output[port].output = NULL;
+	for (master = 0; master < TH_CONFIGURABLE_MASTERS + 1; master++)
+		if (gth->master[master] == port)
+			gth->master[master] = -1;
+	spin_unlock(&gth->gth_lock);
+}
+
+static int
+intel_th_gth_set_output(struct intel_th_device *thdev, unsigned int master)
+{
+	struct gth_device *gth = dev_get_drvdata(&thdev->dev);
+	int port = 0; /* FIXME: make default output configurable */
+
+	/*
+	 * everything above TH_CONFIGURABLE_MASTERS is controlled by the
+	 * same register
+	 */
+	if (master > TH_CONFIGURABLE_MASTERS)
+		master = TH_CONFIGURABLE_MASTERS;
+
+	spin_lock(&gth->gth_lock);
+	if (gth->master[master] == -1) {
+		set_bit(master, gth->output[port].master);
+		gth->master[master] = port;
+	}
+	spin_unlock(&gth->gth_lock);
+
+	return 0;
+}
+
+static int intel_th_gth_probe(struct intel_th_device *thdev)
+{
+	struct device *dev = &thdev->dev;
+	struct intel_th *th = dev_get_drvdata(dev->parent);
+	struct gth_device *gth;
+	struct resource *res;
+	void __iomem *base;
+	int i, ret;
+
+	res = intel_th_device_get_resource(thdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	base = devm_ioremap(dev, res->start, resource_size(res));
+	if (!base)
+		return -ENOMEM;
+
+	gth = devm_kzalloc(dev, sizeof(*gth), GFP_KERNEL);
+	if (!gth)
+		return -ENOMEM;
+
+	gth->dev = dev;
+	gth->base = base;
+	spin_lock_init(&gth->gth_lock);
+
+	dev_set_drvdata(dev, gth);
+
+	/*
+	 * Host mode can be signalled via SW means or via SCRPD_DEBUGGER_IN_USE
+	 * bit. Either way, don't reset HW in this case, and don't export any
+	 * capture configuration attributes. Also, refuse to assign output
+	 * drivers to ports, see intel_th_gth_assign().
+	 */
+	if (thdev->host_mode)
+		return 0;
+
+	ret = intel_th_gth_reset(gth);
+	if (ret) {
+		if (ret != -EBUSY)
+			return ret;
+
+		thdev->host_mode = true;
+
+		return 0;
+	}
+
+	for (i = 0; i < TH_CONFIGURABLE_MASTERS + 1; i++)
+		gth->master[i] = -1;
+
+	for (i = 0; i < TH_POSSIBLE_OUTPUTS; i++) {
+		gth->output[i].gth = gth;
+		gth->output[i].index = i;
+		gth->output[i].port_type =
+			gth_output_parm_get(gth, i, TH_OUTPUT_PARM(port));
+		if (gth->output[i].port_type == GTH_NONE)
+			continue;
+
+		ret = intel_th_output_enable(th, gth->output[i].port_type);
+		/* -ENODEV is ok, we just won't have that device enumerated */
+		if (ret && ret != -ENODEV)
+			return ret;
+	}
+
+	if (intel_th_output_attributes(gth) ||
+	    intel_th_master_attributes(gth)) {
+		pr_warn("Can't initialize sysfs attributes\n");
+
+		if (gth->output_group.attrs)
+			sysfs_remove_group(&gth->dev->kobj, &gth->output_group);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void intel_th_gth_remove(struct intel_th_device *thdev)
+{
+	struct gth_device *gth = dev_get_drvdata(&thdev->dev);
+
+	sysfs_remove_group(&gth->dev->kobj, &gth->output_group);
+	sysfs_remove_group(&gth->dev->kobj, &gth->master_group);
+}
+
+static struct intel_th_driver intel_th_gth_driver = {
+	.probe		= intel_th_gth_probe,
+	.remove		= intel_th_gth_remove,
+	.assign		= intel_th_gth_assign,
+	.unassign	= intel_th_gth_unassign,
+	.set_output	= intel_th_gth_set_output,
+	.prepare	= intel_th_gth_prepare,
+	.enable		= intel_th_gth_enable,
+	.trig_switch	= intel_th_gth_switch,
+	.disable	= intel_th_gth_disable,
+	.driver	= {
+		.name	= "gth",
+		.owner	= THIS_MODULE,
+	},
+};
+
+module_driver(intel_th_gth_driver,
+	      intel_th_driver_register,
+	      intel_th_driver_unregister);
+
+MODULE_ALIAS("intel_th_switch");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel(R) Trace Hub Global Trace Hub driver");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>");
diff --git a/drivers/hwtracing/intel_th/gth.h b/drivers/hwtracing/intel_th/gth.h
new file mode 100644
index 0000000000..bfcc0fd011
--- /dev/null
+++ b/drivers/hwtracing/intel_th/gth.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Intel(R) Trace Hub Global Trace Hub (GTH) data structures
+ *
+ * Copyright (C) 2014-2015 Intel Corporation.
+ */
+
+#ifndef __INTEL_TH_GTH_H__
+#define __INTEL_TH_GTH_H__
+
+/* Map output port parameter bits to symbolic names */
+#define TH_OUTPUT_PARM(name)			\
+	TH_OUTPUT_ ## name
+
+enum intel_th_output_parm {
+	/* output port type */
+	TH_OUTPUT_PARM(port),
+	/* generate NULL packet */
+	TH_OUTPUT_PARM(null),
+	/* packet drop */
+	TH_OUTPUT_PARM(drop),
+	/* port in reset state */
+	TH_OUTPUT_PARM(reset),
+	/* flush out data */
+	TH_OUTPUT_PARM(flush),
+	/* mainenance packet frequency */
+	TH_OUTPUT_PARM(smcfreq),
+};
+
+/*
+ * Register offsets
+ */
+enum {
+	REG_GTH_GTHOPT0		= 0x00, /* Output ports 0..3 config */
+	REG_GTH_GTHOPT1		= 0x04, /* Output ports 4..7 config */
+	REG_GTH_SWDEST0		= 0x08, /* Switching destination masters 0..7 */
+	REG_GTH_GSWTDEST	= 0x88, /* Global sw trace destination */
+	REG_GTH_SMCR0		= 0x9c, /* STP mainenance for ports 0/1 */
+	REG_GTH_SMCR1		= 0xa0, /* STP mainenance for ports 2/3 */
+	REG_GTH_SMCR2		= 0xa4, /* STP mainenance for ports 4/5 */
+	REG_GTH_SMCR3		= 0xa8, /* STP mainenance for ports 6/7 */
+	REG_GTH_SCR		= 0xc8, /* Source control (storeEn override) */
+	REG_GTH_STAT		= 0xd4, /* GTH status */
+	REG_GTH_SCR2		= 0xd8, /* Source control (force storeEn off) */
+	REG_GTH_DESTOVR		= 0xdc, /* Destination override */
+	REG_GTH_SCRPD0		= 0xe0, /* ScratchPad[0] */
+	REG_GTH_SCRPD1		= 0xe4, /* ScratchPad[1] */
+	REG_GTH_SCRPD2		= 0xe8, /* ScratchPad[2] */
+	REG_GTH_SCRPD3		= 0xec, /* ScratchPad[3] */
+	REG_TSCU_TSUCTRL	= 0x2000, /* TSCU control register */
+	REG_TSCU_TSCUSTAT	= 0x2004, /* TSCU status register */
+
+	/* Common Capture Sequencer (CTS) registers */
+	REG_CTS_C0S0_EN		= 0x30c0, /* clause_event_enable_c0s0 */
+	REG_CTS_C0S0_ACT	= 0x3180, /* clause_action_control_c0s0 */
+	REG_CTS_STAT		= 0x32a0, /* cts_status */
+	REG_CTS_CTL		= 0x32a4, /* cts_control */
+};
+
+/* waiting for Pipeline Empty bit(s) to assert for GTH */
+#define GTH_PLE_WAITLOOP_DEPTH	10000
+
+#define TSUCTRL_CTCRESYNC	BIT(0)
+#define TSCUSTAT_CTCSYNCING	BIT(1)
+
+/* waiting for Trigger status to assert for CTS */
+#define CTS_TRIG_WAITLOOP_DEPTH	10000
+
+#define CTS_EVENT_ENABLE_IF_ANYTHING	BIT(31)
+#define CTS_ACTION_CONTROL_STATE_OFF	27
+#define CTS_ACTION_CONTROL_SET_STATE(x)	\
+	(((x) & 0x1f) << CTS_ACTION_CONTROL_STATE_OFF)
+#define CTS_ACTION_CONTROL_TRIGGER	BIT(4)
+
+#define CTS_STATE_IDLE			0x10u
+
+#define CTS_CTL_SEQUENCER_ENABLE	BIT(0)
+
+#endif /* __INTEL_TH_GTH_H__ */
diff --git a/drivers/hwtracing/intel_th/intel_th.h b/drivers/hwtracing/intel_th/intel_th.h
new file mode 100644
index 0000000000..6cbba733f2
--- /dev/null
+++ b/drivers/hwtracing/intel_th/intel_th.h
@@ -0,0 +1,384 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Intel(R) Trace Hub data structures
+ *
+ * Copyright (C) 2014-2015 Intel Corporation.
+ */
+
+#ifndef __INTEL_TH_H__
+#define __INTEL_TH_H__
+
+#include <linux/irqreturn.h>
+
+/* intel_th_device device types */
+enum {
+	/* Devices that generate trace data */
+	INTEL_TH_SOURCE = 0,
+	/* Output ports (MSC, PTI) */
+	INTEL_TH_OUTPUT,
+	/* Switch, the Global Trace Hub (GTH) */
+	INTEL_TH_SWITCH,
+};
+
+struct intel_th_device;
+
+/**
+ * struct intel_th_output - descriptor INTEL_TH_OUTPUT type devices
+ * @port:	output port number, assigned by the switch
+ * @type:	GTH_{MSU,CTP,PTI}
+ * @scratchpad:	scratchpad bits to flag when this output is enabled
+ * @multiblock:	true for multiblock output configuration
+ * @active:	true when this output is enabled
+ * @wait_empty:	wait for device pipeline to be empty
+ *
+ * Output port descriptor, used by switch driver to tell which output
+ * port this output device corresponds to. Filled in at output device's
+ * probe time by switch::assign(). Passed from output device driver to
+ * switch related code to enable/disable its port.
+ */
+struct intel_th_output {
+	int		port;
+	unsigned int	type;
+	unsigned int	scratchpad;
+	bool		multiblock;
+	bool		active;
+};
+
+/**
+ * struct intel_th_drvdata - describes hardware capabilities and quirks
+ * @tscu_enable:	device needs SW to enable time stamping unit
+ * @multi_is_broken:	device has multiblock mode is broken
+ * @has_mintctl:	device has interrupt control (MINTCTL) register
+ * @host_mode_only:	device can only operate in 'host debugger' mode
+ */
+struct intel_th_drvdata {
+	unsigned int	tscu_enable        : 1,
+			multi_is_broken    : 1,
+			has_mintctl        : 1,
+			host_mode_only     : 1;
+};
+
+#define INTEL_TH_CAP(_th, _cap) ((_th)->drvdata ? (_th)->drvdata->_cap : 0)
+
+/**
+ * struct intel_th_device - device on the intel_th bus
+ * @dev:		device
+ * @drvdata:		hardware capabilities/quirks
+ * @resource:		array of resources available to this device
+ * @num_resources:	number of resources in @resource array
+ * @type:		INTEL_TH_{SOURCE,OUTPUT,SWITCH}
+ * @id:			device instance or -1
+ * @host_mode:		Intel TH is controlled by an external debug host
+ * @output:		output descriptor for INTEL_TH_OUTPUT devices
+ * @name:		device name to match the driver
+ */
+struct intel_th_device {
+	struct device		dev;
+	const struct intel_th_drvdata *drvdata;
+	struct resource		*resource;
+	unsigned int		num_resources;
+	unsigned int		type;
+	int			id;
+
+	/* INTEL_TH_SWITCH specific */
+	bool			host_mode;
+
+	/* INTEL_TH_OUTPUT specific */
+	struct intel_th_output	output;
+
+	char		name[];
+};
+
+#define to_intel_th_device(_d)				\
+	container_of((_d), struct intel_th_device, dev)
+
+/**
+ * intel_th_device_get_resource() - obtain @num'th resource of type @type
+ * @thdev:	the device to search the resource for
+ * @type:	resource type
+ * @num:	number of the resource
+ */
+static inline struct resource *
+intel_th_device_get_resource(struct intel_th_device *thdev, unsigned int type,
+			     unsigned int num)
+{
+	int i;
+
+	for (i = 0; i < thdev->num_resources; i++)
+		if (resource_type(&thdev->resource[i]) == type && !num--)
+			return &thdev->resource[i];
+
+	return NULL;
+}
+
+/*
+ * GTH, output ports configuration
+ */
+enum {
+	GTH_NONE = 0,
+	GTH_MSU,	/* memory/usb */
+	GTH_CTP,	/* Common Trace Port */
+	GTH_LPP,	/* Low Power Path */
+	GTH_PTI,	/* MIPI-PTI */
+};
+
+/**
+ * intel_th_output_assigned() - if an output device is assigned to a switch port
+ * @thdev:	the output device
+ *
+ * Return:	true if the device is INTEL_TH_OUTPUT *and* is assigned a port
+ */
+static inline bool
+intel_th_output_assigned(struct intel_th_device *thdev)
+{
+	return thdev->type == INTEL_TH_OUTPUT &&
+		(thdev->output.port >= 0 ||
+		 thdev->output.type == GTH_NONE);
+}
+
+/**
+ * struct intel_th_driver - driver for an intel_th_device device
+ * @driver:	generic driver
+ * @probe:	probe method
+ * @remove:	remove method
+ * @assign:	match a given output type device against available outputs
+ * @unassign:	deassociate an output type device from an output port
+ * @prepare:	prepare output port for tracing
+ * @enable:	enable tracing for a given output device
+ * @disable:	disable tracing for a given output device
+ * @irq:	interrupt callback
+ * @activate:	enable tracing on the output's side
+ * @deactivate:	disable tracing on the output's side
+ * @fops:	file operations for device nodes
+ * @attr_group:	attributes provided by the driver
+ *
+ * Callbacks @probe and @remove are required for all device types.
+ * Switch device driver needs to fill in @assign, @enable and @disable
+ * callbacks.
+ */
+struct intel_th_driver {
+	struct device_driver	driver;
+	int			(*probe)(struct intel_th_device *thdev);
+	void			(*remove)(struct intel_th_device *thdev);
+	/* switch (GTH) ops */
+	int			(*assign)(struct intel_th_device *thdev,
+					  struct intel_th_device *othdev);
+	void			(*unassign)(struct intel_th_device *thdev,
+					    struct intel_th_device *othdev);
+	void			(*prepare)(struct intel_th_device *thdev,
+					   struct intel_th_output *output);
+	void			(*enable)(struct intel_th_device *thdev,
+					  struct intel_th_output *output);
+	void			(*trig_switch)(struct intel_th_device *thdev,
+					       struct intel_th_output *output);
+	void			(*disable)(struct intel_th_device *thdev,
+					   struct intel_th_output *output);
+	/* output ops */
+	irqreturn_t		(*irq)(struct intel_th_device *thdev);
+	void			(*wait_empty)(struct intel_th_device *thdev);
+	int			(*activate)(struct intel_th_device *thdev);
+	void			(*deactivate)(struct intel_th_device *thdev);
+	/* file_operations for those who want a device node */
+	const struct file_operations *fops;
+	/* optional attributes */
+	const struct attribute_group *attr_group;
+
+	/* source ops */
+	int			(*set_output)(struct intel_th_device *thdev,
+					      unsigned int master);
+};
+
+#define to_intel_th_driver(_d)					\
+	container_of((_d), struct intel_th_driver, driver)
+
+#define to_intel_th_driver_or_null(_d)		\
+	((_d) ? to_intel_th_driver(_d) : NULL)
+
+/*
+ * Subdevice tree structure is as follows:
+ * + struct intel_th device (pci; dev_{get,set}_drvdata()
+ *   + struct intel_th_device INTEL_TH_SWITCH (GTH)
+ *     + struct intel_th_device INTEL_TH_OUTPUT (MSU, PTI)
+ *   + struct intel_th_device INTEL_TH_SOURCE (STH)
+ *
+ * In other words, INTEL_TH_OUTPUT devices are children of INTEL_TH_SWITCH;
+ * INTEL_TH_SWITCH and INTEL_TH_SOURCE are children of the intel_th device.
+ */
+static inline struct intel_th_device *
+to_intel_th_parent(const struct intel_th_device *thdev)
+{
+	struct device *parent = thdev->dev.parent;
+
+	if (!parent)
+		return NULL;
+
+	return to_intel_th_device(parent);
+}
+
+static inline struct intel_th *to_intel_th(const struct intel_th_device *thdev)
+{
+	if (thdev->type == INTEL_TH_OUTPUT)
+		thdev = to_intel_th_parent(thdev);
+
+	if (WARN_ON_ONCE(!thdev || thdev->type == INTEL_TH_OUTPUT))
+		return NULL;
+
+	return dev_get_drvdata(thdev->dev.parent);
+}
+
+struct intel_th *
+intel_th_alloc(struct device *dev, const struct intel_th_drvdata *drvdata,
+	       struct resource *devres, unsigned int ndevres);
+void intel_th_free(struct intel_th *th);
+
+int intel_th_driver_register(struct intel_th_driver *thdrv);
+void intel_th_driver_unregister(struct intel_th_driver *thdrv);
+
+int intel_th_trace_enable(struct intel_th_device *thdev);
+int intel_th_trace_switch(struct intel_th_device *thdev);
+int intel_th_trace_disable(struct intel_th_device *thdev);
+int intel_th_set_output(struct intel_th_device *thdev,
+			unsigned int master);
+int intel_th_output_enable(struct intel_th *th, unsigned int otype);
+
+enum th_mmio_idx {
+	TH_MMIO_CONFIG = 0,
+	TH_MMIO_SW = 1,
+	TH_MMIO_RTIT = 2,
+	TH_MMIO_END,
+};
+
+#define TH_POSSIBLE_OUTPUTS	8
+/* Total number of possible subdevices: outputs + GTH + STH */
+#define TH_SUBDEVICE_MAX	(TH_POSSIBLE_OUTPUTS + 2)
+#define TH_CONFIGURABLE_MASTERS 256
+#define TH_MSC_MAX		2
+
+/* Maximum IRQ vectors */
+#define TH_NVEC_MAX		8
+
+/**
+ * struct intel_th - Intel TH controller
+ * @dev:	driver core's device
+ * @thdev:	subdevices
+ * @hub:	"switch" subdevice (GTH)
+ * @resource:	resources of the entire controller
+ * @num_thdevs:	number of devices in the @thdev array
+ * @num_resources:	number of resources in the @resource array
+ * @irq:	irq number
+ * @num_irqs:	number of IRQs is use
+ * @id:		this Intel TH controller's device ID in the system
+ * @major:	device node major for output devices
+ */
+struct intel_th {
+	struct device		*dev;
+
+	struct intel_th_device	*thdev[TH_SUBDEVICE_MAX];
+	struct intel_th_device	*hub;
+	const struct intel_th_drvdata	*drvdata;
+
+	struct resource		resource[TH_MMIO_END];
+	int			(*activate)(struct intel_th *);
+	void			(*deactivate)(struct intel_th *);
+	unsigned int		num_thdevs;
+	unsigned int		num_resources;
+	int			irq;
+	int			num_irqs;
+
+	int			id;
+	int			major;
+#ifdef CONFIG_MODULES
+	struct work_struct	request_module_work;
+#endif /* CONFIG_MODULES */
+#ifdef CONFIG_INTEL_TH_DEBUG
+	struct dentry		*dbg;
+#endif
+};
+
+static inline struct intel_th_device *
+to_intel_th_hub(struct intel_th_device *thdev)
+{
+	if (thdev->type == INTEL_TH_SWITCH)
+		return thdev;
+	else if (thdev->type == INTEL_TH_OUTPUT)
+		return to_intel_th_parent(thdev);
+
+	return to_intel_th(thdev)->hub;
+}
+
+/*
+ * Register windows
+ */
+enum {
+	/* Global Trace Hub (GTH) */
+	REG_GTH_OFFSET		= 0x0000,
+	REG_GTH_LENGTH		= 0x2000,
+
+	/* Timestamp counter unit (TSCU) */
+	REG_TSCU_OFFSET		= 0x2000,
+	REG_TSCU_LENGTH		= 0x1000,
+
+	REG_CTS_OFFSET		= 0x3000,
+	REG_CTS_LENGTH		= 0x1000,
+
+	/* Software Trace Hub (STH) [0x4000..0x4fff] */
+	REG_STH_OFFSET		= 0x4000,
+	REG_STH_LENGTH		= 0x2000,
+
+	/* Memory Storage Unit (MSU) [0xa0000..0xa1fff] */
+	REG_MSU_OFFSET		= 0xa0000,
+	REG_MSU_LENGTH		= 0x02000,
+
+	/* Internal MSU trace buffer [0x80000..0x9ffff] */
+	BUF_MSU_OFFSET		= 0x80000,
+	BUF_MSU_LENGTH		= 0x20000,
+
+	/* PTI output == same window as GTH */
+	REG_PTI_OFFSET		= REG_GTH_OFFSET,
+	REG_PTI_LENGTH		= REG_GTH_LENGTH,
+
+	/* DCI Handler (DCIH) == some window as MSU */
+	REG_DCIH_OFFSET		= REG_MSU_OFFSET,
+	REG_DCIH_LENGTH		= REG_MSU_LENGTH,
+};
+
+/*
+ * Scratchpad bits: tell firmware and external debuggers
+ * what we are up to.
+ */
+enum {
+	/* Memory is the primary destination */
+	SCRPD_MEM_IS_PRIM_DEST		= BIT(0),
+	/* XHCI DbC is the primary destination */
+	SCRPD_DBC_IS_PRIM_DEST		= BIT(1),
+	/* PTI is the primary destination */
+	SCRPD_PTI_IS_PRIM_DEST		= BIT(2),
+	/* BSSB is the primary destination */
+	SCRPD_BSSB_IS_PRIM_DEST		= BIT(3),
+	/* PTI is the alternate destination */
+	SCRPD_PTI_IS_ALT_DEST		= BIT(4),
+	/* BSSB is the alternate destination */
+	SCRPD_BSSB_IS_ALT_DEST		= BIT(5),
+	/* DeepSx exit occurred */
+	SCRPD_DEEPSX_EXIT		= BIT(6),
+	/* S4 exit occurred */
+	SCRPD_S4_EXIT			= BIT(7),
+	/* S5 exit occurred */
+	SCRPD_S5_EXIT			= BIT(8),
+	/* MSU controller 0/1 is enabled */
+	SCRPD_MSC0_IS_ENABLED		= BIT(9),
+	SCRPD_MSC1_IS_ENABLED		= BIT(10),
+	/* Sx exit occurred */
+	SCRPD_SX_EXIT			= BIT(11),
+	/* Trigger Unit is enabled */
+	SCRPD_TRIGGER_IS_ENABLED	= BIT(12),
+	SCRPD_ODLA_IS_ENABLED		= BIT(13),
+	SCRPD_SOCHAP_IS_ENABLED		= BIT(14),
+	SCRPD_STH_IS_ENABLED		= BIT(15),
+	SCRPD_DCIH_IS_ENABLED		= BIT(16),
+	SCRPD_VER_IS_ENABLED		= BIT(17),
+	/* External debugger is using Intel TH */
+	SCRPD_DEBUGGER_IN_USE		= BIT(24),
+};
+
+#endif
diff --git a/drivers/hwtracing/intel_th/msu-sink.c b/drivers/hwtracing/intel_th/msu-sink.c
new file mode 100644
index 0000000000..891b28ea25
--- /dev/null
+++ b/drivers/hwtracing/intel_th/msu-sink.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * An example software sink buffer for Intel TH MSU.
+ *
+ * Copyright (C) 2019 Intel Corporation.
+ */
+
+#include <linux/intel_th.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+
+#define MAX_SGTS 16
+
+struct msu_sink_private {
+	struct device	*dev;
+	struct sg_table **sgts;
+	unsigned int	nr_sgts;
+};
+
+static void *msu_sink_assign(struct device *dev, int *mode)
+{
+	struct msu_sink_private *priv;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return NULL;
+
+	priv->sgts = kcalloc(MAX_SGTS, sizeof(void *), GFP_KERNEL);
+	if (!priv->sgts) {
+		kfree(priv);
+		return NULL;
+	}
+
+	priv->dev = dev;
+	*mode = MSC_MODE_MULTI;
+
+	return priv;
+}
+
+static void msu_sink_unassign(void *data)
+{
+	struct msu_sink_private *priv = data;
+
+	kfree(priv->sgts);
+	kfree(priv);
+}
+
+/* See also: msc.c: __msc_buffer_win_alloc() */
+static int msu_sink_alloc_window(void *data, struct sg_table **sgt, size_t size)
+{
+	struct msu_sink_private *priv = data;
+	unsigned int nents;
+	struct scatterlist *sg_ptr;
+	void *block;
+	int ret, i;
+
+	if (priv->nr_sgts == MAX_SGTS)
+		return -ENOMEM;
+
+	nents = DIV_ROUND_UP(size, PAGE_SIZE);
+
+	ret = sg_alloc_table(*sgt, nents, GFP_KERNEL);
+	if (ret)
+		return -ENOMEM;
+
+	priv->sgts[priv->nr_sgts++] = *sgt;
+
+	for_each_sg((*sgt)->sgl, sg_ptr, nents, i) {
+		block = dma_alloc_coherent(priv->dev->parent->parent,
+					   PAGE_SIZE, &sg_dma_address(sg_ptr),
+					   GFP_KERNEL);
+		if (!block)
+			return -ENOMEM;
+
+		sg_set_buf(sg_ptr, block, PAGE_SIZE);
+	}
+
+	return nents;
+}
+
+/* See also: msc.c: __msc_buffer_win_free() */
+static void msu_sink_free_window(void *data, struct sg_table *sgt)
+{
+	struct msu_sink_private *priv = data;
+	struct scatterlist *sg_ptr;
+	int i;
+
+	for_each_sg(sgt->sgl, sg_ptr, sgt->nents, i) {
+		dma_free_coherent(priv->dev->parent->parent, PAGE_SIZE,
+				  sg_virt(sg_ptr), sg_dma_address(sg_ptr));
+	}
+
+	sg_free_table(sgt);
+	priv->nr_sgts--;
+}
+
+static int msu_sink_ready(void *data, struct sg_table *sgt, size_t bytes)
+{
+	struct msu_sink_private *priv = data;
+
+	intel_th_msc_window_unlock(priv->dev, sgt);
+
+	return 0;
+}
+
+static const struct msu_buffer sink_mbuf = {
+	.name		= "sink",
+	.assign		= msu_sink_assign,
+	.unassign	= msu_sink_unassign,
+	.alloc_window	= msu_sink_alloc_window,
+	.free_window	= msu_sink_free_window,
+	.ready		= msu_sink_ready,
+};
+
+module_intel_th_msu_buffer(sink_mbuf);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c
new file mode 100644
index 0000000000..9621efe0e9
--- /dev/null
+++ b/drivers/hwtracing/intel_th/msu.c
@@ -0,0 +1,2206 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel(R) Trace Hub Memory Storage Unit
+ *
+ * Copyright (C) 2014-2015 Intel Corporation.
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/uaccess.h>
+#include <linux/sizes.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/io.h>
+#include <linux/workqueue.h>
+#include <linux/dma-mapping.h>
+
+#ifdef CONFIG_X86
+#include <asm/set_memory.h>
+#endif
+
+#include <linux/intel_th.h>
+#include "intel_th.h"
+#include "msu.h"
+
+#define msc_dev(x) (&(x)->thdev->dev)
+
+/*
+ * Lockout state transitions:
+ *   READY -> INUSE -+-> LOCKED -+-> READY -> etc.
+ *                   \-----------/
+ * WIN_READY:	window can be used by HW
+ * WIN_INUSE:	window is in use
+ * WIN_LOCKED:	window is filled up and is being processed by the buffer
+ * handling code
+ *
+ * All state transitions happen automatically, except for the LOCKED->READY,
+ * which needs to be signalled by the buffer code by calling
+ * intel_th_msc_window_unlock().
+ *
+ * When the interrupt handler has to switch to the next window, it checks
+ * whether it's READY, and if it is, it performs the switch and tracing
+ * continues. If it's LOCKED, it stops the trace.
+ */
+enum lockout_state {
+	WIN_READY = 0,
+	WIN_INUSE,
+	WIN_LOCKED
+};
+
+/**
+ * struct msc_window - multiblock mode window descriptor
+ * @entry:	window list linkage (msc::win_list)
+ * @pgoff:	page offset into the buffer that this window starts at
+ * @lockout:	lockout state, see comment below
+ * @lo_lock:	lockout state serialization
+ * @nr_blocks:	number of blocks (pages) in this window
+ * @nr_segs:	number of segments in this window (<= @nr_blocks)
+ * @_sgt:	array of block descriptors
+ * @sgt:	array of block descriptors
+ */
+struct msc_window {
+	struct list_head	entry;
+	unsigned long		pgoff;
+	enum lockout_state	lockout;
+	spinlock_t		lo_lock;
+	unsigned int		nr_blocks;
+	unsigned int		nr_segs;
+	struct msc		*msc;
+	struct sg_table		_sgt;
+	struct sg_table		*sgt;
+};
+
+/**
+ * struct msc_iter - iterator for msc buffer
+ * @entry:		msc::iter_list linkage
+ * @msc:		pointer to the MSC device
+ * @start_win:		oldest window
+ * @win:		current window
+ * @offset:		current logical offset into the buffer
+ * @start_block:	oldest block in the window
+ * @block:		block number in the window
+ * @block_off:		offset into current block
+ * @wrap_count:		block wrapping handling
+ * @eof:		end of buffer reached
+ */
+struct msc_iter {
+	struct list_head	entry;
+	struct msc		*msc;
+	struct msc_window	*start_win;
+	struct msc_window	*win;
+	unsigned long		offset;
+	struct scatterlist	*start_block;
+	struct scatterlist	*block;
+	unsigned int		block_off;
+	unsigned int		wrap_count;
+	unsigned int		eof;
+};
+
+/**
+ * struct msc - MSC device representation
+ * @reg_base:		register window base address
+ * @thdev:		intel_th_device pointer
+ * @mbuf:		MSU buffer, if assigned
+ * @mbuf_priv		MSU buffer's private data, if @mbuf
+ * @win_list:		list of windows in multiblock mode
+ * @single_sgt:		single mode buffer
+ * @cur_win:		current window
+ * @nr_pages:		total number of pages allocated for this buffer
+ * @single_sz:		amount of data in single mode
+ * @single_wrap:	single mode wrap occurred
+ * @base:		buffer's base pointer
+ * @base_addr:		buffer's base address
+ * @user_count:		number of users of the buffer
+ * @mmap_count:		number of mappings
+ * @buf_mutex:		mutex to serialize access to buffer-related bits
+
+ * @enabled:		MSC is enabled
+ * @wrap:		wrapping is enabled
+ * @mode:		MSC operating mode
+ * @burst_len:		write burst length
+ * @index:		number of this MSC in the MSU
+ */
+struct msc {
+	void __iomem		*reg_base;
+	void __iomem		*msu_base;
+	struct intel_th_device	*thdev;
+
+	const struct msu_buffer	*mbuf;
+	void			*mbuf_priv;
+
+	struct work_struct	work;
+	struct list_head	win_list;
+	struct sg_table		single_sgt;
+	struct msc_window	*cur_win;
+	struct msc_window	*switch_on_unlock;
+	unsigned long		nr_pages;
+	unsigned long		single_sz;
+	unsigned int		single_wrap : 1;
+	void			*base;
+	dma_addr_t		base_addr;
+	u32			orig_addr;
+	u32			orig_sz;
+
+	/* <0: no buffer, 0: no users, >0: active users */
+	atomic_t		user_count;
+
+	atomic_t		mmap_count;
+	struct mutex		buf_mutex;
+
+	struct list_head	iter_list;
+
+	bool			stop_on_full;
+
+	/* config */
+	unsigned int		enabled : 1,
+				wrap	: 1,
+				do_irq	: 1,
+				multi_is_broken : 1;
+	unsigned int		mode;
+	unsigned int		burst_len;
+	unsigned int		index;
+};
+
+static LIST_HEAD(msu_buffer_list);
+static DEFINE_MUTEX(msu_buffer_mutex);
+
+/**
+ * struct msu_buffer_entry - internal MSU buffer bookkeeping
+ * @entry:	link to msu_buffer_list
+ * @mbuf:	MSU buffer object
+ * @owner:	module that provides this MSU buffer
+ */
+struct msu_buffer_entry {
+	struct list_head	entry;
+	const struct msu_buffer	*mbuf;
+	struct module		*owner;
+};
+
+static struct msu_buffer_entry *__msu_buffer_entry_find(const char *name)
+{
+	struct msu_buffer_entry *mbe;
+
+	lockdep_assert_held(&msu_buffer_mutex);
+
+	list_for_each_entry(mbe, &msu_buffer_list, entry) {
+		if (!strcmp(mbe->mbuf->name, name))
+			return mbe;
+	}
+
+	return NULL;
+}
+
+static const struct msu_buffer *
+msu_buffer_get(const char *name)
+{
+	struct msu_buffer_entry *mbe;
+
+	mutex_lock(&msu_buffer_mutex);
+	mbe = __msu_buffer_entry_find(name);
+	if (mbe && !try_module_get(mbe->owner))
+		mbe = NULL;
+	mutex_unlock(&msu_buffer_mutex);
+
+	return mbe ? mbe->mbuf : NULL;
+}
+
+static void msu_buffer_put(const struct msu_buffer *mbuf)
+{
+	struct msu_buffer_entry *mbe;
+
+	mutex_lock(&msu_buffer_mutex);
+	mbe = __msu_buffer_entry_find(mbuf->name);
+	if (mbe)
+		module_put(mbe->owner);
+	mutex_unlock(&msu_buffer_mutex);
+}
+
+int intel_th_msu_buffer_register(const struct msu_buffer *mbuf,
+				 struct module *owner)
+{
+	struct msu_buffer_entry *mbe;
+	int ret = 0;
+
+	mbe = kzalloc(sizeof(*mbe), GFP_KERNEL);
+	if (!mbe)
+		return -ENOMEM;
+
+	mutex_lock(&msu_buffer_mutex);
+	if (__msu_buffer_entry_find(mbuf->name)) {
+		ret = -EEXIST;
+		kfree(mbe);
+		goto unlock;
+	}
+
+	mbe->mbuf = mbuf;
+	mbe->owner = owner;
+	list_add_tail(&mbe->entry, &msu_buffer_list);
+unlock:
+	mutex_unlock(&msu_buffer_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(intel_th_msu_buffer_register);
+
+void intel_th_msu_buffer_unregister(const struct msu_buffer *mbuf)
+{
+	struct msu_buffer_entry *mbe;
+
+	mutex_lock(&msu_buffer_mutex);
+	mbe = __msu_buffer_entry_find(mbuf->name);
+	if (mbe) {
+		list_del(&mbe->entry);
+		kfree(mbe);
+	}
+	mutex_unlock(&msu_buffer_mutex);
+}
+EXPORT_SYMBOL_GPL(intel_th_msu_buffer_unregister);
+
+static inline bool msc_block_is_empty(struct msc_block_desc *bdesc)
+{
+	/* header hasn't been written */
+	if (!bdesc->valid_dw)
+		return true;
+
+	/* valid_dw includes the header */
+	if (!msc_data_sz(bdesc))
+		return true;
+
+	return false;
+}
+
+static inline struct scatterlist *msc_win_base_sg(struct msc_window *win)
+{
+	return win->sgt->sgl;
+}
+
+static inline struct msc_block_desc *msc_win_base(struct msc_window *win)
+{
+	return sg_virt(msc_win_base_sg(win));
+}
+
+static inline dma_addr_t msc_win_base_dma(struct msc_window *win)
+{
+	return sg_dma_address(msc_win_base_sg(win));
+}
+
+static inline unsigned long
+msc_win_base_pfn(struct msc_window *win)
+{
+	return PFN_DOWN(msc_win_base_dma(win));
+}
+
+/**
+ * msc_is_last_win() - check if a window is the last one for a given MSC
+ * @win:	window
+ * Return:	true if @win is the last window in MSC's multiblock buffer
+ */
+static inline bool msc_is_last_win(struct msc_window *win)
+{
+	return win->entry.next == &win->msc->win_list;
+}
+
+/**
+ * msc_next_window() - return next window in the multiblock buffer
+ * @win:	current window
+ *
+ * Return:	window following the current one
+ */
+static struct msc_window *msc_next_window(struct msc_window *win)
+{
+	if (msc_is_last_win(win))
+		return list_first_entry(&win->msc->win_list, struct msc_window,
+					entry);
+
+	return list_next_entry(win, entry);
+}
+
+static size_t msc_win_total_sz(struct msc_window *win)
+{
+	struct scatterlist *sg;
+	unsigned int blk;
+	size_t size = 0;
+
+	for_each_sg(win->sgt->sgl, sg, win->nr_segs, blk) {
+		struct msc_block_desc *bdesc = sg_virt(sg);
+
+		if (msc_block_wrapped(bdesc))
+			return (size_t)win->nr_blocks << PAGE_SHIFT;
+
+		size += msc_total_sz(bdesc);
+		if (msc_block_last_written(bdesc))
+			break;
+	}
+
+	return size;
+}
+
+/**
+ * msc_find_window() - find a window matching a given sg_table
+ * @msc:	MSC device
+ * @sgt:	SG table of the window
+ * @nonempty:	skip over empty windows
+ *
+ * Return:	MSC window structure pointer or NULL if the window
+ *		could not be found.
+ */
+static struct msc_window *
+msc_find_window(struct msc *msc, struct sg_table *sgt, bool nonempty)
+{
+	struct msc_window *win;
+	unsigned int found = 0;
+
+	if (list_empty(&msc->win_list))
+		return NULL;
+
+	/*
+	 * we might need a radix tree for this, depending on how
+	 * many windows a typical user would allocate; ideally it's
+	 * something like 2, in which case we're good
+	 */
+	list_for_each_entry(win, &msc->win_list, entry) {
+		if (win->sgt == sgt)
+			found++;
+
+		/* skip the empty ones */
+		if (nonempty && msc_block_is_empty(msc_win_base(win)))
+			continue;
+
+		if (found)
+			return win;
+	}
+
+	return NULL;
+}
+
+/**
+ * msc_oldest_window() - locate the window with oldest data
+ * @msc:	MSC device
+ *
+ * This should only be used in multiblock mode. Caller should hold the
+ * msc::user_count reference.
+ *
+ * Return:	the oldest window with valid data
+ */
+static struct msc_window *msc_oldest_window(struct msc *msc)
+{
+	struct msc_window *win;
+
+	if (list_empty(&msc->win_list))
+		return NULL;
+
+	win = msc_find_window(msc, msc_next_window(msc->cur_win)->sgt, true);
+	if (win)
+		return win;
+
+	return list_first_entry(&msc->win_list, struct msc_window, entry);
+}
+
+/**
+ * msc_win_oldest_sg() - locate the oldest block in a given window
+ * @win:	window to look at
+ *
+ * Return:	index of the block with the oldest data
+ */
+static struct scatterlist *msc_win_oldest_sg(struct msc_window *win)
+{
+	unsigned int blk;
+	struct scatterlist *sg;
+	struct msc_block_desc *bdesc = msc_win_base(win);
+
+	/* without wrapping, first block is the oldest */
+	if (!msc_block_wrapped(bdesc))
+		return msc_win_base_sg(win);
+
+	/*
+	 * with wrapping, last written block contains both the newest and the
+	 * oldest data for this window.
+	 */
+	for_each_sg(win->sgt->sgl, sg, win->nr_segs, blk) {
+		struct msc_block_desc *bdesc = sg_virt(sg);
+
+		if (msc_block_last_written(bdesc))
+			return sg;
+	}
+
+	return msc_win_base_sg(win);
+}
+
+static struct msc_block_desc *msc_iter_bdesc(struct msc_iter *iter)
+{
+	return sg_virt(iter->block);
+}
+
+static struct msc_iter *msc_iter_install(struct msc *msc)
+{
+	struct msc_iter *iter;
+
+	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+	if (!iter)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_lock(&msc->buf_mutex);
+
+	/*
+	 * Reading and tracing are mutually exclusive; if msc is
+	 * enabled, open() will fail; otherwise existing readers
+	 * will prevent enabling the msc and the rest of fops don't
+	 * need to worry about it.
+	 */
+	if (msc->enabled) {
+		kfree(iter);
+		iter = ERR_PTR(-EBUSY);
+		goto unlock;
+	}
+
+	iter->msc = msc;
+
+	list_add_tail(&iter->entry, &msc->iter_list);
+unlock:
+	mutex_unlock(&msc->buf_mutex);
+
+	return iter;
+}
+
+static void msc_iter_remove(struct msc_iter *iter, struct msc *msc)
+{
+	mutex_lock(&msc->buf_mutex);
+	list_del(&iter->entry);
+	mutex_unlock(&msc->buf_mutex);
+
+	kfree(iter);
+}
+
+static void msc_iter_block_start(struct msc_iter *iter)
+{
+	if (iter->start_block)
+		return;
+
+	iter->start_block = msc_win_oldest_sg(iter->win);
+	iter->block = iter->start_block;
+	iter->wrap_count = 0;
+
+	/*
+	 * start with the block with oldest data; if data has wrapped
+	 * in this window, it should be in this block
+	 */
+	if (msc_block_wrapped(msc_iter_bdesc(iter)))
+		iter->wrap_count = 2;
+
+}
+
+static int msc_iter_win_start(struct msc_iter *iter, struct msc *msc)
+{
+	/* already started, nothing to do */
+	if (iter->start_win)
+		return 0;
+
+	iter->start_win = msc_oldest_window(msc);
+	if (!iter->start_win)
+		return -EINVAL;
+
+	iter->win = iter->start_win;
+	iter->start_block = NULL;
+
+	msc_iter_block_start(iter);
+
+	return 0;
+}
+
+static int msc_iter_win_advance(struct msc_iter *iter)
+{
+	iter->win = msc_next_window(iter->win);
+	iter->start_block = NULL;
+
+	if (iter->win == iter->start_win) {
+		iter->eof++;
+		return 1;
+	}
+
+	msc_iter_block_start(iter);
+
+	return 0;
+}
+
+static int msc_iter_block_advance(struct msc_iter *iter)
+{
+	iter->block_off = 0;
+
+	/* wrapping */
+	if (iter->wrap_count && iter->block == iter->start_block) {
+		iter->wrap_count--;
+		if (!iter->wrap_count)
+			/* copied newest data from the wrapped block */
+			return msc_iter_win_advance(iter);
+	}
+
+	/* no wrapping, check for last written block */
+	if (!iter->wrap_count && msc_block_last_written(msc_iter_bdesc(iter)))
+		/* copied newest data for the window */
+		return msc_iter_win_advance(iter);
+
+	/* block advance */
+	if (sg_is_last(iter->block))
+		iter->block = msc_win_base_sg(iter->win);
+	else
+		iter->block = sg_next(iter->block);
+
+	/* no wrapping, sanity check in case there is no last written block */
+	if (!iter->wrap_count && iter->block == iter->start_block)
+		return msc_iter_win_advance(iter);
+
+	return 0;
+}
+
+/**
+ * msc_buffer_iterate() - go through multiblock buffer's data
+ * @iter:	iterator structure
+ * @size:	amount of data to scan
+ * @data:	callback's private data
+ * @fn:		iterator callback
+ *
+ * This will start at the window which will be written to next (containing
+ * the oldest data) and work its way to the current window, calling @fn
+ * for each chunk of data as it goes.
+ *
+ * Caller should have msc::user_count reference to make sure the buffer
+ * doesn't disappear from under us.
+ *
+ * Return:	amount of data actually scanned.
+ */
+static ssize_t
+msc_buffer_iterate(struct msc_iter *iter, size_t size, void *data,
+		   unsigned long (*fn)(void *, void *, size_t))
+{
+	struct msc *msc = iter->msc;
+	size_t len = size;
+	unsigned int advance;
+
+	if (iter->eof)
+		return 0;
+
+	/* start with the oldest window */
+	if (msc_iter_win_start(iter, msc))
+		return 0;
+
+	do {
+		unsigned long data_bytes = msc_data_sz(msc_iter_bdesc(iter));
+		void *src = (void *)msc_iter_bdesc(iter) + MSC_BDESC;
+		size_t tocopy = data_bytes, copied = 0;
+		size_t remaining = 0;
+
+		advance = 1;
+
+		/*
+		 * If block wrapping happened, we need to visit the last block
+		 * twice, because it contains both the oldest and the newest
+		 * data in this window.
+		 *
+		 * First time (wrap_count==2), in the very beginning, to collect
+		 * the oldest data, which is in the range
+		 * (data_bytes..DATA_IN_PAGE).
+		 *
+		 * Second time (wrap_count==1), it's just like any other block,
+		 * containing data in the range of [MSC_BDESC..data_bytes].
+		 */
+		if (iter->block == iter->start_block && iter->wrap_count == 2) {
+			tocopy = DATA_IN_PAGE - data_bytes;
+			src += data_bytes;
+		}
+
+		if (!tocopy)
+			goto next_block;
+
+		tocopy -= iter->block_off;
+		src += iter->block_off;
+
+		if (len < tocopy) {
+			tocopy = len;
+			advance = 0;
+		}
+
+		remaining = fn(data, src, tocopy);
+
+		if (remaining)
+			advance = 0;
+
+		copied = tocopy - remaining;
+		len -= copied;
+		iter->block_off += copied;
+		iter->offset += copied;
+
+		if (!advance)
+			break;
+
+next_block:
+		if (msc_iter_block_advance(iter))
+			break;
+
+	} while (len);
+
+	return size - len;
+}
+
+/**
+ * msc_buffer_clear_hw_header() - clear hw header for multiblock
+ * @msc:	MSC device
+ */
+static void msc_buffer_clear_hw_header(struct msc *msc)
+{
+	struct msc_window *win;
+	struct scatterlist *sg;
+
+	list_for_each_entry(win, &msc->win_list, entry) {
+		unsigned int blk;
+
+		for_each_sg(win->sgt->sgl, sg, win->nr_segs, blk) {
+			struct msc_block_desc *bdesc = sg_virt(sg);
+
+			memset_startat(bdesc, 0, hw_tag);
+		}
+	}
+}
+
+static int intel_th_msu_init(struct msc *msc)
+{
+	u32 mintctl, msusts;
+
+	if (!msc->do_irq)
+		return 0;
+
+	if (!msc->mbuf)
+		return 0;
+
+	mintctl = ioread32(msc->msu_base + REG_MSU_MINTCTL);
+	mintctl |= msc->index ? M1BLIE : M0BLIE;
+	iowrite32(mintctl, msc->msu_base + REG_MSU_MINTCTL);
+	if (mintctl != ioread32(msc->msu_base + REG_MSU_MINTCTL)) {
+		dev_info(msc_dev(msc), "MINTCTL ignores writes: no usable interrupts\n");
+		msc->do_irq = 0;
+		return 0;
+	}
+
+	msusts = ioread32(msc->msu_base + REG_MSU_MSUSTS);
+	iowrite32(msusts, msc->msu_base + REG_MSU_MSUSTS);
+
+	return 0;
+}
+
+static void intel_th_msu_deinit(struct msc *msc)
+{
+	u32 mintctl;
+
+	if (!msc->do_irq)
+		return;
+
+	mintctl = ioread32(msc->msu_base + REG_MSU_MINTCTL);
+	mintctl &= msc->index ? ~M1BLIE : ~M0BLIE;
+	iowrite32(mintctl, msc->msu_base + REG_MSU_MINTCTL);
+}
+
+static int msc_win_set_lockout(struct msc_window *win,
+			       enum lockout_state expect,
+			       enum lockout_state new)
+{
+	enum lockout_state old;
+	unsigned long flags;
+	int ret = 0;
+
+	if (!win->msc->mbuf)
+		return 0;
+
+	spin_lock_irqsave(&win->lo_lock, flags);
+	old = win->lockout;
+
+	if (old != expect) {
+		ret = -EINVAL;
+		goto unlock;
+	}
+
+	win->lockout = new;
+
+	if (old == expect && new == WIN_LOCKED)
+		atomic_inc(&win->msc->user_count);
+	else if (old == expect && old == WIN_LOCKED)
+		atomic_dec(&win->msc->user_count);
+
+unlock:
+	spin_unlock_irqrestore(&win->lo_lock, flags);
+
+	if (ret) {
+		if (expect == WIN_READY && old == WIN_LOCKED)
+			return -EBUSY;
+
+		/* from intel_th_msc_window_unlock(), don't warn if not locked */
+		if (expect == WIN_LOCKED && old == new)
+			return 0;
+
+		dev_warn_ratelimited(msc_dev(win->msc),
+				     "expected lockout state %d, got %d\n",
+				     expect, old);
+	}
+
+	return ret;
+}
+/**
+ * msc_configure() - set up MSC hardware
+ * @msc:	the MSC device to configure
+ *
+ * Program storage mode, wrapping, burst length and trace buffer address
+ * into a given MSC. Then, enable tracing and set msc::enabled.
+ * The latter is serialized on msc::buf_mutex, so make sure to hold it.
+ */
+static int msc_configure(struct msc *msc)
+{
+	u32 reg;
+
+	lockdep_assert_held(&msc->buf_mutex);
+
+	if (msc->mode > MSC_MODE_MULTI)
+		return -EINVAL;
+
+	if (msc->mode == MSC_MODE_MULTI) {
+		if (msc_win_set_lockout(msc->cur_win, WIN_READY, WIN_INUSE))
+			return -EBUSY;
+
+		msc_buffer_clear_hw_header(msc);
+	}
+
+	msc->orig_addr = ioread32(msc->reg_base + REG_MSU_MSC0BAR);
+	msc->orig_sz   = ioread32(msc->reg_base + REG_MSU_MSC0SIZE);
+
+	reg = msc->base_addr >> PAGE_SHIFT;
+	iowrite32(reg, msc->reg_base + REG_MSU_MSC0BAR);
+
+	if (msc->mode == MSC_MODE_SINGLE) {
+		reg = msc->nr_pages;
+		iowrite32(reg, msc->reg_base + REG_MSU_MSC0SIZE);
+	}
+
+	reg = ioread32(msc->reg_base + REG_MSU_MSC0CTL);
+	reg &= ~(MSC_MODE | MSC_WRAPEN | MSC_EN | MSC_RD_HDR_OVRD);
+
+	reg |= MSC_EN;
+	reg |= msc->mode << __ffs(MSC_MODE);
+	reg |= msc->burst_len << __ffs(MSC_LEN);
+
+	if (msc->wrap)
+		reg |= MSC_WRAPEN;
+
+	iowrite32(reg, msc->reg_base + REG_MSU_MSC0CTL);
+
+	intel_th_msu_init(msc);
+
+	msc->thdev->output.multiblock = msc->mode == MSC_MODE_MULTI;
+	intel_th_trace_enable(msc->thdev);
+	msc->enabled = 1;
+
+	if (msc->mbuf && msc->mbuf->activate)
+		msc->mbuf->activate(msc->mbuf_priv);
+
+	return 0;
+}
+
+/**
+ * msc_disable() - disable MSC hardware
+ * @msc:	MSC device to disable
+ *
+ * If @msc is enabled, disable tracing on the switch and then disable MSC
+ * storage. Caller must hold msc::buf_mutex.
+ */
+static void msc_disable(struct msc *msc)
+{
+	struct msc_window *win = msc->cur_win;
+	u32 reg;
+
+	lockdep_assert_held(&msc->buf_mutex);
+
+	if (msc->mode == MSC_MODE_MULTI)
+		msc_win_set_lockout(win, WIN_INUSE, WIN_LOCKED);
+
+	if (msc->mbuf && msc->mbuf->deactivate)
+		msc->mbuf->deactivate(msc->mbuf_priv);
+	intel_th_msu_deinit(msc);
+	intel_th_trace_disable(msc->thdev);
+
+	if (msc->mode == MSC_MODE_SINGLE) {
+		reg = ioread32(msc->reg_base + REG_MSU_MSC0STS);
+		msc->single_wrap = !!(reg & MSCSTS_WRAPSTAT);
+
+		reg = ioread32(msc->reg_base + REG_MSU_MSC0MWP);
+		msc->single_sz = reg & ((msc->nr_pages << PAGE_SHIFT) - 1);
+		dev_dbg(msc_dev(msc), "MSCnMWP: %08x/%08lx, wrap: %d\n",
+			reg, msc->single_sz, msc->single_wrap);
+	}
+
+	reg = ioread32(msc->reg_base + REG_MSU_MSC0CTL);
+	reg &= ~MSC_EN;
+	iowrite32(reg, msc->reg_base + REG_MSU_MSC0CTL);
+
+	if (msc->mbuf && msc->mbuf->ready)
+		msc->mbuf->ready(msc->mbuf_priv, win->sgt,
+				 msc_win_total_sz(win));
+
+	msc->enabled = 0;
+
+	iowrite32(msc->orig_addr, msc->reg_base + REG_MSU_MSC0BAR);
+	iowrite32(msc->orig_sz, msc->reg_base + REG_MSU_MSC0SIZE);
+
+	dev_dbg(msc_dev(msc), "MSCnNWSA: %08x\n",
+		ioread32(msc->reg_base + REG_MSU_MSC0NWSA));
+
+	reg = ioread32(msc->reg_base + REG_MSU_MSC0STS);
+	dev_dbg(msc_dev(msc), "MSCnSTS: %08x\n", reg);
+
+	reg = ioread32(msc->reg_base + REG_MSU_MSUSTS);
+	reg &= msc->index ? MSUSTS_MSC1BLAST : MSUSTS_MSC0BLAST;
+	iowrite32(reg, msc->reg_base + REG_MSU_MSUSTS);
+}
+
+static int intel_th_msc_activate(struct intel_th_device *thdev)
+{
+	struct msc *msc = dev_get_drvdata(&thdev->dev);
+	int ret = -EBUSY;
+
+	if (!atomic_inc_unless_negative(&msc->user_count))
+		return -ENODEV;
+
+	mutex_lock(&msc->buf_mutex);
+
+	/* if there are readers, refuse */
+	if (list_empty(&msc->iter_list))
+		ret = msc_configure(msc);
+
+	mutex_unlock(&msc->buf_mutex);
+
+	if (ret)
+		atomic_dec(&msc->user_count);
+
+	return ret;
+}
+
+static void intel_th_msc_deactivate(struct intel_th_device *thdev)
+{
+	struct msc *msc = dev_get_drvdata(&thdev->dev);
+
+	mutex_lock(&msc->buf_mutex);
+	if (msc->enabled) {
+		msc_disable(msc);
+		atomic_dec(&msc->user_count);
+	}
+	mutex_unlock(&msc->buf_mutex);
+}
+
+/**
+ * msc_buffer_contig_alloc() - allocate a contiguous buffer for SINGLE mode
+ * @msc:	MSC device
+ * @size:	allocation size in bytes
+ *
+ * This modifies msc::base, which requires msc::buf_mutex to serialize, so the
+ * caller is expected to hold it.
+ *
+ * Return:	0 on success, -errno otherwise.
+ */
+static int msc_buffer_contig_alloc(struct msc *msc, unsigned long size)
+{
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+	unsigned int order = get_order(size);
+	struct page *page;
+	int ret;
+
+	if (!size)
+		return 0;
+
+	ret = sg_alloc_table(&msc->single_sgt, 1, GFP_KERNEL);
+	if (ret)
+		goto err_out;
+
+	ret = -ENOMEM;
+	page = alloc_pages(GFP_KERNEL | __GFP_ZERO | GFP_DMA32, order);
+	if (!page)
+		goto err_free_sgt;
+
+	split_page(page, order);
+	sg_set_buf(msc->single_sgt.sgl, page_address(page), size);
+
+	ret = dma_map_sg(msc_dev(msc)->parent->parent, msc->single_sgt.sgl, 1,
+			 DMA_FROM_DEVICE);
+	if (ret < 0)
+		goto err_free_pages;
+
+	msc->nr_pages = nr_pages;
+	msc->base = page_address(page);
+	msc->base_addr = sg_dma_address(msc->single_sgt.sgl);
+
+	return 0;
+
+err_free_pages:
+	__free_pages(page, order);
+
+err_free_sgt:
+	sg_free_table(&msc->single_sgt);
+
+err_out:
+	return ret;
+}
+
+/**
+ * msc_buffer_contig_free() - free a contiguous buffer
+ * @msc:	MSC configured in SINGLE mode
+ */
+static void msc_buffer_contig_free(struct msc *msc)
+{
+	unsigned long off;
+
+	dma_unmap_sg(msc_dev(msc)->parent->parent, msc->single_sgt.sgl,
+		     1, DMA_FROM_DEVICE);
+	sg_free_table(&msc->single_sgt);
+
+	for (off = 0; off < msc->nr_pages << PAGE_SHIFT; off += PAGE_SIZE) {
+		struct page *page = virt_to_page(msc->base + off);
+
+		page->mapping = NULL;
+		__free_page(page);
+	}
+
+	msc->nr_pages = 0;
+}
+
+/**
+ * msc_buffer_contig_get_page() - find a page at a given offset
+ * @msc:	MSC configured in SINGLE mode
+ * @pgoff:	page offset
+ *
+ * Return:	page, if @pgoff is within the range, NULL otherwise.
+ */
+static struct page *msc_buffer_contig_get_page(struct msc *msc,
+					       unsigned long pgoff)
+{
+	if (pgoff >= msc->nr_pages)
+		return NULL;
+
+	return virt_to_page(msc->base + (pgoff << PAGE_SHIFT));
+}
+
+static int __msc_buffer_win_alloc(struct msc_window *win,
+				  unsigned int nr_segs)
+{
+	struct scatterlist *sg_ptr;
+	void *block;
+	int i, ret;
+
+	ret = sg_alloc_table(win->sgt, nr_segs, GFP_KERNEL);
+	if (ret)
+		return -ENOMEM;
+
+	for_each_sg(win->sgt->sgl, sg_ptr, nr_segs, i) {
+		block = dma_alloc_coherent(msc_dev(win->msc)->parent->parent,
+					  PAGE_SIZE, &sg_dma_address(sg_ptr),
+					  GFP_KERNEL);
+		if (!block)
+			goto err_nomem;
+
+		sg_set_buf(sg_ptr, block, PAGE_SIZE);
+	}
+
+	return nr_segs;
+
+err_nomem:
+	for_each_sg(win->sgt->sgl, sg_ptr, i, ret)
+		dma_free_coherent(msc_dev(win->msc)->parent->parent, PAGE_SIZE,
+				  sg_virt(sg_ptr), sg_dma_address(sg_ptr));
+
+	sg_free_table(win->sgt);
+
+	return -ENOMEM;
+}
+
+#ifdef CONFIG_X86
+static void msc_buffer_set_uc(struct msc *msc)
+{
+	struct scatterlist *sg_ptr;
+	struct msc_window *win;
+	int i;
+
+	if (msc->mode == MSC_MODE_SINGLE) {
+		set_memory_uc((unsigned long)msc->base, msc->nr_pages);
+		return;
+	}
+
+	list_for_each_entry(win, &msc->win_list, entry) {
+		for_each_sg(win->sgt->sgl, sg_ptr, win->nr_segs, i) {
+			/* Set the page as uncached */
+			set_memory_uc((unsigned long)sg_virt(sg_ptr),
+					PFN_DOWN(sg_ptr->length));
+		}
+	}
+}
+
+static void msc_buffer_set_wb(struct msc *msc)
+{
+	struct scatterlist *sg_ptr;
+	struct msc_window *win;
+	int i;
+
+	if (msc->mode == MSC_MODE_SINGLE) {
+		set_memory_wb((unsigned long)msc->base, msc->nr_pages);
+		return;
+	}
+
+	list_for_each_entry(win, &msc->win_list, entry) {
+		for_each_sg(win->sgt->sgl, sg_ptr, win->nr_segs, i) {
+			/* Reset the page to write-back */
+			set_memory_wb((unsigned long)sg_virt(sg_ptr),
+					PFN_DOWN(sg_ptr->length));
+		}
+	}
+}
+#else /* !X86 */
+static inline void
+msc_buffer_set_uc(struct msc *msc) {}
+static inline void msc_buffer_set_wb(struct msc *msc) {}
+#endif /* CONFIG_X86 */
+
+static struct page *msc_sg_page(struct scatterlist *sg)
+{
+	void *addr = sg_virt(sg);
+
+	if (is_vmalloc_addr(addr))
+		return vmalloc_to_page(addr);
+
+	return sg_page(sg);
+}
+
+/**
+ * msc_buffer_win_alloc() - alloc a window for a multiblock mode
+ * @msc:	MSC device
+ * @nr_blocks:	number of pages in this window
+ *
+ * This modifies msc::win_list and msc::base, which requires msc::buf_mutex
+ * to serialize, so the caller is expected to hold it.
+ *
+ * Return:	0 on success, -errno otherwise.
+ */
+static int msc_buffer_win_alloc(struct msc *msc, unsigned int nr_blocks)
+{
+	struct msc_window *win;
+	int ret = -ENOMEM;
+
+	if (!nr_blocks)
+		return 0;
+
+	win = kzalloc(sizeof(*win), GFP_KERNEL);
+	if (!win)
+		return -ENOMEM;
+
+	win->msc = msc;
+	win->sgt = &win->_sgt;
+	win->lockout = WIN_READY;
+	spin_lock_init(&win->lo_lock);
+
+	if (!list_empty(&msc->win_list)) {
+		struct msc_window *prev = list_last_entry(&msc->win_list,
+							  struct msc_window,
+							  entry);
+
+		win->pgoff = prev->pgoff + prev->nr_blocks;
+	}
+
+	if (msc->mbuf && msc->mbuf->alloc_window)
+		ret = msc->mbuf->alloc_window(msc->mbuf_priv, &win->sgt,
+					      nr_blocks << PAGE_SHIFT);
+	else
+		ret = __msc_buffer_win_alloc(win, nr_blocks);
+
+	if (ret <= 0)
+		goto err_nomem;
+
+	win->nr_segs = ret;
+	win->nr_blocks = nr_blocks;
+
+	if (list_empty(&msc->win_list)) {
+		msc->base = msc_win_base(win);
+		msc->base_addr = msc_win_base_dma(win);
+		msc->cur_win = win;
+	}
+
+	list_add_tail(&win->entry, &msc->win_list);
+	msc->nr_pages += nr_blocks;
+
+	return 0;
+
+err_nomem:
+	kfree(win);
+
+	return ret;
+}
+
+static void __msc_buffer_win_free(struct msc *msc, struct msc_window *win)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(win->sgt->sgl, sg, win->nr_segs, i) {
+		struct page *page = msc_sg_page(sg);
+
+		page->mapping = NULL;
+		dma_free_coherent(msc_dev(win->msc)->parent->parent, PAGE_SIZE,
+				  sg_virt(sg), sg_dma_address(sg));
+	}
+	sg_free_table(win->sgt);
+}
+
+/**
+ * msc_buffer_win_free() - free a window from MSC's window list
+ * @msc:	MSC device
+ * @win:	window to free
+ *
+ * This modifies msc::win_list and msc::base, which requires msc::buf_mutex
+ * to serialize, so the caller is expected to hold it.
+ */
+static void msc_buffer_win_free(struct msc *msc, struct msc_window *win)
+{
+	msc->nr_pages -= win->nr_blocks;
+
+	list_del(&win->entry);
+	if (list_empty(&msc->win_list)) {
+		msc->base = NULL;
+		msc->base_addr = 0;
+	}
+
+	if (msc->mbuf && msc->mbuf->free_window)
+		msc->mbuf->free_window(msc->mbuf_priv, win->sgt);
+	else
+		__msc_buffer_win_free(msc, win);
+
+	kfree(win);
+}
+
+/**
+ * msc_buffer_relink() - set up block descriptors for multiblock mode
+ * @msc:	MSC device
+ *
+ * This traverses msc::win_list, which requires msc::buf_mutex to serialize,
+ * so the caller is expected to hold it.
+ */
+static void msc_buffer_relink(struct msc *msc)
+{
+	struct msc_window *win, *next_win;
+
+	/* call with msc::mutex locked */
+	list_for_each_entry(win, &msc->win_list, entry) {
+		struct scatterlist *sg;
+		unsigned int blk;
+		u32 sw_tag = 0;
+
+		/*
+		 * Last window's next_win should point to the first window
+		 * and MSC_SW_TAG_LASTWIN should be set.
+		 */
+		if (msc_is_last_win(win)) {
+			sw_tag |= MSC_SW_TAG_LASTWIN;
+			next_win = list_first_entry(&msc->win_list,
+						    struct msc_window, entry);
+		} else {
+			next_win = list_next_entry(win, entry);
+		}
+
+		for_each_sg(win->sgt->sgl, sg, win->nr_segs, blk) {
+			struct msc_block_desc *bdesc = sg_virt(sg);
+
+			memset(bdesc, 0, sizeof(*bdesc));
+
+			bdesc->next_win = msc_win_base_pfn(next_win);
+
+			/*
+			 * Similarly to last window, last block should point
+			 * to the first one.
+			 */
+			if (blk == win->nr_segs - 1) {
+				sw_tag |= MSC_SW_TAG_LASTBLK;
+				bdesc->next_blk = msc_win_base_pfn(win);
+			} else {
+				dma_addr_t addr = sg_dma_address(sg_next(sg));
+
+				bdesc->next_blk = PFN_DOWN(addr);
+			}
+
+			bdesc->sw_tag = sw_tag;
+			bdesc->block_sz = sg->length / 64;
+		}
+	}
+
+	/*
+	 * Make the above writes globally visible before tracing is
+	 * enabled to make sure hardware sees them coherently.
+	 */
+	wmb();
+}
+
+static void msc_buffer_multi_free(struct msc *msc)
+{
+	struct msc_window *win, *iter;
+
+	list_for_each_entry_safe(win, iter, &msc->win_list, entry)
+		msc_buffer_win_free(msc, win);
+}
+
+static int msc_buffer_multi_alloc(struct msc *msc, unsigned long *nr_pages,
+				  unsigned int nr_wins)
+{
+	int ret, i;
+
+	for (i = 0; i < nr_wins; i++) {
+		ret = msc_buffer_win_alloc(msc, nr_pages[i]);
+		if (ret) {
+			msc_buffer_multi_free(msc);
+			return ret;
+		}
+	}
+
+	msc_buffer_relink(msc);
+
+	return 0;
+}
+
+/**
+ * msc_buffer_free() - free buffers for MSC
+ * @msc:	MSC device
+ *
+ * Free MSC's storage buffers.
+ *
+ * This modifies msc::win_list and msc::base, which requires msc::buf_mutex to
+ * serialize, so the caller is expected to hold it.
+ */
+static void msc_buffer_free(struct msc *msc)
+{
+	msc_buffer_set_wb(msc);
+
+	if (msc->mode == MSC_MODE_SINGLE)
+		msc_buffer_contig_free(msc);
+	else if (msc->mode == MSC_MODE_MULTI)
+		msc_buffer_multi_free(msc);
+}
+
+/**
+ * msc_buffer_alloc() - allocate a buffer for MSC
+ * @msc:	MSC device
+ * @size:	allocation size in bytes
+ *
+ * Allocate a storage buffer for MSC, depending on the msc::mode, it will be
+ * either done via msc_buffer_contig_alloc() for SINGLE operation mode or
+ * msc_buffer_win_alloc() for multiblock operation. The latter allocates one
+ * window per invocation, so in multiblock mode this can be called multiple
+ * times for the same MSC to allocate multiple windows.
+ *
+ * This modifies msc::win_list and msc::base, which requires msc::buf_mutex
+ * to serialize, so the caller is expected to hold it.
+ *
+ * Return:	0 on success, -errno otherwise.
+ */
+static int msc_buffer_alloc(struct msc *msc, unsigned long *nr_pages,
+			    unsigned int nr_wins)
+{
+	int ret;
+
+	/* -1: buffer not allocated */
+	if (atomic_read(&msc->user_count) != -1)
+		return -EBUSY;
+
+	if (msc->mode == MSC_MODE_SINGLE) {
+		if (nr_wins != 1)
+			return -EINVAL;
+
+		ret = msc_buffer_contig_alloc(msc, nr_pages[0] << PAGE_SHIFT);
+	} else if (msc->mode == MSC_MODE_MULTI) {
+		ret = msc_buffer_multi_alloc(msc, nr_pages, nr_wins);
+	} else {
+		ret = -EINVAL;
+	}
+
+	if (!ret) {
+		msc_buffer_set_uc(msc);
+
+		/* allocation should be visible before the counter goes to 0 */
+		smp_mb__before_atomic();
+
+		if (WARN_ON_ONCE(atomic_cmpxchg(&msc->user_count, -1, 0) != -1))
+			return -EINVAL;
+	}
+
+	return ret;
+}
+
+/**
+ * msc_buffer_unlocked_free_unless_used() - free a buffer unless it's in use
+ * @msc:	MSC device
+ *
+ * This will free MSC buffer unless it is in use or there is no allocated
+ * buffer.
+ * Caller needs to hold msc::buf_mutex.
+ *
+ * Return:	0 on successful deallocation or if there was no buffer to
+ *		deallocate, -EBUSY if there are active users.
+ */
+static int msc_buffer_unlocked_free_unless_used(struct msc *msc)
+{
+	int count, ret = 0;
+
+	count = atomic_cmpxchg(&msc->user_count, 0, -1);
+
+	/* > 0: buffer is allocated and has users */
+	if (count > 0)
+		ret = -EBUSY;
+	/* 0: buffer is allocated, no users */
+	else if (!count)
+		msc_buffer_free(msc);
+	/* < 0: no buffer, nothing to do */
+
+	return ret;
+}
+
+/**
+ * msc_buffer_free_unless_used() - free a buffer unless it's in use
+ * @msc:	MSC device
+ *
+ * This is a locked version of msc_buffer_unlocked_free_unless_used().
+ */
+static int msc_buffer_free_unless_used(struct msc *msc)
+{
+	int ret;
+
+	mutex_lock(&msc->buf_mutex);
+	ret = msc_buffer_unlocked_free_unless_used(msc);
+	mutex_unlock(&msc->buf_mutex);
+
+	return ret;
+}
+
+/**
+ * msc_buffer_get_page() - get MSC buffer page at a given offset
+ * @msc:	MSC device
+ * @pgoff:	page offset into the storage buffer
+ *
+ * This traverses msc::win_list, so holding msc::buf_mutex is expected from
+ * the caller.
+ *
+ * Return:	page if @pgoff corresponds to a valid buffer page or NULL.
+ */
+static struct page *msc_buffer_get_page(struct msc *msc, unsigned long pgoff)
+{
+	struct msc_window *win;
+	struct scatterlist *sg;
+	unsigned int blk;
+
+	if (msc->mode == MSC_MODE_SINGLE)
+		return msc_buffer_contig_get_page(msc, pgoff);
+
+	list_for_each_entry(win, &msc->win_list, entry)
+		if (pgoff >= win->pgoff && pgoff < win->pgoff + win->nr_blocks)
+			goto found;
+
+	return NULL;
+
+found:
+	pgoff -= win->pgoff;
+
+	for_each_sg(win->sgt->sgl, sg, win->nr_segs, blk) {
+		struct page *page = msc_sg_page(sg);
+		size_t pgsz = PFN_DOWN(sg->length);
+
+		if (pgoff < pgsz)
+			return page + pgoff;
+
+		pgoff -= pgsz;
+	}
+
+	return NULL;
+}
+
+/**
+ * struct msc_win_to_user_struct - data for copy_to_user() callback
+ * @buf:	userspace buffer to copy data to
+ * @offset:	running offset
+ */
+struct msc_win_to_user_struct {
+	char __user	*buf;
+	unsigned long	offset;
+};
+
+/**
+ * msc_win_to_user() - iterator for msc_buffer_iterate() to copy data to user
+ * @data:	callback's private data
+ * @src:	source buffer
+ * @len:	amount of data to copy from the source buffer
+ */
+static unsigned long msc_win_to_user(void *data, void *src, size_t len)
+{
+	struct msc_win_to_user_struct *u = data;
+	unsigned long ret;
+
+	ret = copy_to_user(u->buf + u->offset, src, len);
+	u->offset += len - ret;
+
+	return ret;
+}
+
+
+/*
+ * file operations' callbacks
+ */
+
+static int intel_th_msc_open(struct inode *inode, struct file *file)
+{
+	struct intel_th_device *thdev = file->private_data;
+	struct msc *msc = dev_get_drvdata(&thdev->dev);
+	struct msc_iter *iter;
+
+	if (!capable(CAP_SYS_RAWIO))
+		return -EPERM;
+
+	iter = msc_iter_install(msc);
+	if (IS_ERR(iter))
+		return PTR_ERR(iter);
+
+	file->private_data = iter;
+
+	return nonseekable_open(inode, file);
+}
+
+static int intel_th_msc_release(struct inode *inode, struct file *file)
+{
+	struct msc_iter *iter = file->private_data;
+	struct msc *msc = iter->msc;
+
+	msc_iter_remove(iter, msc);
+
+	return 0;
+}
+
+static ssize_t
+msc_single_to_user(struct msc *msc, char __user *buf, loff_t off, size_t len)
+{
+	unsigned long size = msc->nr_pages << PAGE_SHIFT, rem = len;
+	unsigned long start = off, tocopy = 0;
+
+	if (msc->single_wrap) {
+		start += msc->single_sz;
+		if (start < size) {
+			tocopy = min(rem, size - start);
+			if (copy_to_user(buf, msc->base + start, tocopy))
+				return -EFAULT;
+
+			buf += tocopy;
+			rem -= tocopy;
+			start += tocopy;
+		}
+
+		start &= size - 1;
+		if (rem) {
+			tocopy = min(rem, msc->single_sz - start);
+			if (copy_to_user(buf, msc->base + start, tocopy))
+				return -EFAULT;
+
+			rem -= tocopy;
+		}
+
+		return len - rem;
+	}
+
+	if (copy_to_user(buf, msc->base + start, rem))
+		return -EFAULT;
+
+	return len;
+}
+
+static ssize_t intel_th_msc_read(struct file *file, char __user *buf,
+				 size_t len, loff_t *ppos)
+{
+	struct msc_iter *iter = file->private_data;
+	struct msc *msc = iter->msc;
+	size_t size;
+	loff_t off = *ppos;
+	ssize_t ret = 0;
+
+	if (!atomic_inc_unless_negative(&msc->user_count))
+		return 0;
+
+	if (msc->mode == MSC_MODE_SINGLE && !msc->single_wrap)
+		size = msc->single_sz;
+	else
+		size = msc->nr_pages << PAGE_SHIFT;
+
+	if (!size)
+		goto put_count;
+
+	if (off >= size)
+		goto put_count;
+
+	if (off + len >= size)
+		len = size - off;
+
+	if (msc->mode == MSC_MODE_SINGLE) {
+		ret = msc_single_to_user(msc, buf, off, len);
+		if (ret >= 0)
+			*ppos += ret;
+	} else if (msc->mode == MSC_MODE_MULTI) {
+		struct msc_win_to_user_struct u = {
+			.buf	= buf,
+			.offset	= 0,
+		};
+
+		ret = msc_buffer_iterate(iter, len, &u, msc_win_to_user);
+		if (ret >= 0)
+			*ppos = iter->offset;
+	} else {
+		ret = -EINVAL;
+	}
+
+put_count:
+	atomic_dec(&msc->user_count);
+
+	return ret;
+}
+
+/*
+ * vm operations callbacks (vm_ops)
+ */
+
+static void msc_mmap_open(struct vm_area_struct *vma)
+{
+	struct msc_iter *iter = vma->vm_file->private_data;
+	struct msc *msc = iter->msc;
+
+	atomic_inc(&msc->mmap_count);
+}
+
+static void msc_mmap_close(struct vm_area_struct *vma)
+{
+	struct msc_iter *iter = vma->vm_file->private_data;
+	struct msc *msc = iter->msc;
+	unsigned long pg;
+
+	if (!atomic_dec_and_mutex_lock(&msc->mmap_count, &msc->buf_mutex))
+		return;
+
+	/* drop page _refcounts */
+	for (pg = 0; pg < msc->nr_pages; pg++) {
+		struct page *page = msc_buffer_get_page(msc, pg);
+
+		if (WARN_ON_ONCE(!page))
+			continue;
+
+		if (page->mapping)
+			page->mapping = NULL;
+	}
+
+	/* last mapping -- drop user_count */
+	atomic_dec(&msc->user_count);
+	mutex_unlock(&msc->buf_mutex);
+}
+
+static vm_fault_t msc_mmap_fault(struct vm_fault *vmf)
+{
+	struct msc_iter *iter = vmf->vma->vm_file->private_data;
+	struct msc *msc = iter->msc;
+
+	vmf->page = msc_buffer_get_page(msc, vmf->pgoff);
+	if (!vmf->page)
+		return VM_FAULT_SIGBUS;
+
+	get_page(vmf->page);
+	vmf->page->mapping = vmf->vma->vm_file->f_mapping;
+	vmf->page->index = vmf->pgoff;
+
+	return 0;
+}
+
+static const struct vm_operations_struct msc_mmap_ops = {
+	.open	= msc_mmap_open,
+	.close	= msc_mmap_close,
+	.fault	= msc_mmap_fault,
+};
+
+static int intel_th_msc_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	unsigned long size = vma->vm_end - vma->vm_start;
+	struct msc_iter *iter = vma->vm_file->private_data;
+	struct msc *msc = iter->msc;
+	int ret = -EINVAL;
+
+	if (!size || offset_in_page(size))
+		return -EINVAL;
+
+	if (vma->vm_pgoff)
+		return -EINVAL;
+
+	/* grab user_count once per mmap; drop in msc_mmap_close() */
+	if (!atomic_inc_unless_negative(&msc->user_count))
+		return -EINVAL;
+
+	if (msc->mode != MSC_MODE_SINGLE &&
+	    msc->mode != MSC_MODE_MULTI)
+		goto out;
+
+	if (size >> PAGE_SHIFT != msc->nr_pages)
+		goto out;
+
+	atomic_set(&msc->mmap_count, 1);
+	ret = 0;
+
+out:
+	if (ret)
+		atomic_dec(&msc->user_count);
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	vm_flags_set(vma, VM_DONTEXPAND | VM_DONTCOPY);
+	vma->vm_ops = &msc_mmap_ops;
+	return ret;
+}
+
+static const struct file_operations intel_th_msc_fops = {
+	.open		= intel_th_msc_open,
+	.release	= intel_th_msc_release,
+	.read		= intel_th_msc_read,
+	.mmap		= intel_th_msc_mmap,
+	.llseek		= no_llseek,
+	.owner		= THIS_MODULE,
+};
+
+static void intel_th_msc_wait_empty(struct intel_th_device *thdev)
+{
+	struct msc *msc = dev_get_drvdata(&thdev->dev);
+	unsigned long count;
+	u32 reg;
+
+	for (reg = 0, count = MSC_PLE_WAITLOOP_DEPTH;
+	     count && !(reg & MSCSTS_PLE); count--) {
+		reg = __raw_readl(msc->reg_base + REG_MSU_MSC0STS);
+		cpu_relax();
+	}
+
+	if (!count)
+		dev_dbg(msc_dev(msc), "timeout waiting for MSC0 PLE\n");
+}
+
+static int intel_th_msc_init(struct msc *msc)
+{
+	atomic_set(&msc->user_count, -1);
+
+	msc->mode = msc->multi_is_broken ? MSC_MODE_SINGLE : MSC_MODE_MULTI;
+	mutex_init(&msc->buf_mutex);
+	INIT_LIST_HEAD(&msc->win_list);
+	INIT_LIST_HEAD(&msc->iter_list);
+
+	msc->burst_len =
+		(ioread32(msc->reg_base + REG_MSU_MSC0CTL) & MSC_LEN) >>
+		__ffs(MSC_LEN);
+
+	return 0;
+}
+
+static int msc_win_switch(struct msc *msc)
+{
+	struct msc_window *first;
+
+	if (list_empty(&msc->win_list))
+		return -EINVAL;
+
+	first = list_first_entry(&msc->win_list, struct msc_window, entry);
+
+	if (msc_is_last_win(msc->cur_win))
+		msc->cur_win = first;
+	else
+		msc->cur_win = list_next_entry(msc->cur_win, entry);
+
+	msc->base = msc_win_base(msc->cur_win);
+	msc->base_addr = msc_win_base_dma(msc->cur_win);
+
+	intel_th_trace_switch(msc->thdev);
+
+	return 0;
+}
+
+/**
+ * intel_th_msc_window_unlock - put the window back in rotation
+ * @dev:	MSC device to which this relates
+ * @sgt:	buffer's sg_table for the window, does nothing if NULL
+ */
+void intel_th_msc_window_unlock(struct device *dev, struct sg_table *sgt)
+{
+	struct msc *msc = dev_get_drvdata(dev);
+	struct msc_window *win;
+
+	if (!sgt)
+		return;
+
+	win = msc_find_window(msc, sgt, false);
+	if (!win)
+		return;
+
+	msc_win_set_lockout(win, WIN_LOCKED, WIN_READY);
+	if (msc->switch_on_unlock == win) {
+		msc->switch_on_unlock = NULL;
+		msc_win_switch(msc);
+	}
+}
+EXPORT_SYMBOL_GPL(intel_th_msc_window_unlock);
+
+static void msc_work(struct work_struct *work)
+{
+	struct msc *msc = container_of(work, struct msc, work);
+
+	intel_th_msc_deactivate(msc->thdev);
+}
+
+static irqreturn_t intel_th_msc_interrupt(struct intel_th_device *thdev)
+{
+	struct msc *msc = dev_get_drvdata(&thdev->dev);
+	u32 msusts = ioread32(msc->msu_base + REG_MSU_MSUSTS);
+	u32 mask = msc->index ? MSUSTS_MSC1BLAST : MSUSTS_MSC0BLAST;
+	struct msc_window *win, *next_win;
+
+	if (!msc->do_irq || !msc->mbuf)
+		return IRQ_NONE;
+
+	msusts &= mask;
+
+	if (!msusts)
+		return msc->enabled ? IRQ_HANDLED : IRQ_NONE;
+
+	iowrite32(msusts, msc->msu_base + REG_MSU_MSUSTS);
+
+	if (!msc->enabled)
+		return IRQ_NONE;
+
+	/* grab the window before we do the switch */
+	win = msc->cur_win;
+	if (!win)
+		return IRQ_HANDLED;
+	next_win = msc_next_window(win);
+	if (!next_win)
+		return IRQ_HANDLED;
+
+	/* next window: if READY, proceed, if LOCKED, stop the trace */
+	if (msc_win_set_lockout(next_win, WIN_READY, WIN_INUSE)) {
+		if (msc->stop_on_full)
+			schedule_work(&msc->work);
+		else
+			msc->switch_on_unlock = next_win;
+
+		return IRQ_HANDLED;
+	}
+
+	/* current window: INUSE -> LOCKED */
+	msc_win_set_lockout(win, WIN_INUSE, WIN_LOCKED);
+
+	msc_win_switch(msc);
+
+	if (msc->mbuf && msc->mbuf->ready)
+		msc->mbuf->ready(msc->mbuf_priv, win->sgt,
+				 msc_win_total_sz(win));
+
+	return IRQ_HANDLED;
+}
+
+static const char * const msc_mode[] = {
+	[MSC_MODE_SINGLE]	= "single",
+	[MSC_MODE_MULTI]	= "multi",
+	[MSC_MODE_EXI]		= "ExI",
+	[MSC_MODE_DEBUG]	= "debug",
+};
+
+static ssize_t
+wrap_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct msc *msc = dev_get_drvdata(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", msc->wrap);
+}
+
+static ssize_t
+wrap_store(struct device *dev, struct device_attribute *attr, const char *buf,
+	   size_t size)
+{
+	struct msc *msc = dev_get_drvdata(dev);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	msc->wrap = !!val;
+
+	return size;
+}
+
+static DEVICE_ATTR_RW(wrap);
+
+static void msc_buffer_unassign(struct msc *msc)
+{
+	lockdep_assert_held(&msc->buf_mutex);
+
+	if (!msc->mbuf)
+		return;
+
+	msc->mbuf->unassign(msc->mbuf_priv);
+	msu_buffer_put(msc->mbuf);
+	msc->mbuf_priv = NULL;
+	msc->mbuf = NULL;
+}
+
+static ssize_t
+mode_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct msc *msc = dev_get_drvdata(dev);
+	const char *mode = msc_mode[msc->mode];
+	ssize_t ret;
+
+	mutex_lock(&msc->buf_mutex);
+	if (msc->mbuf)
+		mode = msc->mbuf->name;
+	ret = scnprintf(buf, PAGE_SIZE, "%s\n", mode);
+	mutex_unlock(&msc->buf_mutex);
+
+	return ret;
+}
+
+static ssize_t
+mode_store(struct device *dev, struct device_attribute *attr, const char *buf,
+	   size_t size)
+{
+	const struct msu_buffer *mbuf = NULL;
+	struct msc *msc = dev_get_drvdata(dev);
+	size_t len = size;
+	char *cp, *mode;
+	int i, ret;
+
+	if (!capable(CAP_SYS_RAWIO))
+		return -EPERM;
+
+	cp = memchr(buf, '\n', len);
+	if (cp)
+		len = cp - buf;
+
+	mode = kstrndup(buf, len, GFP_KERNEL);
+	if (!mode)
+		return -ENOMEM;
+
+	i = match_string(msc_mode, ARRAY_SIZE(msc_mode), mode);
+	if (i >= 0) {
+		kfree(mode);
+		goto found;
+	}
+
+	/* Buffer sinks only work with a usable IRQ */
+	if (!msc->do_irq) {
+		kfree(mode);
+		return -EINVAL;
+	}
+
+	mbuf = msu_buffer_get(mode);
+	kfree(mode);
+	if (mbuf)
+		goto found;
+
+	return -EINVAL;
+
+found:
+	if (i == MSC_MODE_MULTI && msc->multi_is_broken)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&msc->buf_mutex);
+	ret = 0;
+
+	/* Same buffer: do nothing */
+	if (mbuf && mbuf == msc->mbuf) {
+		/* put the extra reference we just got */
+		msu_buffer_put(mbuf);
+		goto unlock;
+	}
+
+	ret = msc_buffer_unlocked_free_unless_used(msc);
+	if (ret)
+		goto unlock;
+
+	if (mbuf) {
+		void *mbuf_priv = mbuf->assign(dev, &i);
+
+		if (!mbuf_priv) {
+			ret = -ENOMEM;
+			goto unlock;
+		}
+
+		msc_buffer_unassign(msc);
+		msc->mbuf_priv = mbuf_priv;
+		msc->mbuf = mbuf;
+	} else {
+		msc_buffer_unassign(msc);
+	}
+
+	msc->mode = i;
+
+unlock:
+	if (ret && mbuf)
+		msu_buffer_put(mbuf);
+	mutex_unlock(&msc->buf_mutex);
+
+	return ret ? ret : size;
+}
+
+static DEVICE_ATTR_RW(mode);
+
+static ssize_t
+nr_pages_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct msc *msc = dev_get_drvdata(dev);
+	struct msc_window *win;
+	size_t count = 0;
+
+	mutex_lock(&msc->buf_mutex);
+
+	if (msc->mode == MSC_MODE_SINGLE)
+		count = scnprintf(buf, PAGE_SIZE, "%ld\n", msc->nr_pages);
+	else if (msc->mode == MSC_MODE_MULTI) {
+		list_for_each_entry(win, &msc->win_list, entry) {
+			count += scnprintf(buf + count, PAGE_SIZE - count,
+					   "%d%c", win->nr_blocks,
+					   msc_is_last_win(win) ? '\n' : ',');
+		}
+	} else {
+		count = scnprintf(buf, PAGE_SIZE, "unsupported\n");
+	}
+
+	mutex_unlock(&msc->buf_mutex);
+
+	return count;
+}
+
+static ssize_t
+nr_pages_store(struct device *dev, struct device_attribute *attr,
+	       const char *buf, size_t size)
+{
+	struct msc *msc = dev_get_drvdata(dev);
+	unsigned long val, *win = NULL, *rewin;
+	size_t len = size;
+	const char *p = buf;
+	char *end, *s;
+	int ret, nr_wins = 0;
+
+	if (!capable(CAP_SYS_RAWIO))
+		return -EPERM;
+
+	ret = msc_buffer_free_unless_used(msc);
+	if (ret)
+		return ret;
+
+	/* scan the comma-separated list of allocation sizes */
+	end = memchr(buf, '\n', len);
+	if (end)
+		len = end - buf;
+
+	do {
+		end = memchr(p, ',', len);
+		s = kstrndup(p, end ? end - p : len, GFP_KERNEL);
+		if (!s) {
+			ret = -ENOMEM;
+			goto free_win;
+		}
+
+		ret = kstrtoul(s, 10, &val);
+		kfree(s);
+
+		if (ret || !val)
+			goto free_win;
+
+		if (nr_wins && msc->mode == MSC_MODE_SINGLE) {
+			ret = -EINVAL;
+			goto free_win;
+		}
+
+		nr_wins++;
+		rewin = krealloc_array(win, nr_wins, sizeof(*win), GFP_KERNEL);
+		if (!rewin) {
+			kfree(win);
+			return -ENOMEM;
+		}
+
+		win = rewin;
+		win[nr_wins - 1] = val;
+
+		if (!end)
+			break;
+
+		/* consume the number and the following comma, hence +1 */
+		len -= end - p + 1;
+		p = end + 1;
+	} while (len);
+
+	mutex_lock(&msc->buf_mutex);
+	ret = msc_buffer_alloc(msc, win, nr_wins);
+	mutex_unlock(&msc->buf_mutex);
+
+free_win:
+	kfree(win);
+
+	return ret ? ret : size;
+}
+
+static DEVICE_ATTR_RW(nr_pages);
+
+static ssize_t
+win_switch_store(struct device *dev, struct device_attribute *attr,
+		 const char *buf, size_t size)
+{
+	struct msc *msc = dev_get_drvdata(dev);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val != 1)
+		return -EINVAL;
+
+	ret = -EINVAL;
+	mutex_lock(&msc->buf_mutex);
+	/*
+	 * Window switch can only happen in the "multi" mode.
+	 * If a external buffer is engaged, they have the full
+	 * control over window switching.
+	 */
+	if (msc->mode == MSC_MODE_MULTI && !msc->mbuf)
+		ret = msc_win_switch(msc);
+	mutex_unlock(&msc->buf_mutex);
+
+	return ret ? ret : size;
+}
+
+static DEVICE_ATTR_WO(win_switch);
+
+static ssize_t stop_on_full_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct msc *msc = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", msc->stop_on_full);
+}
+
+static ssize_t stop_on_full_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	struct msc *msc = dev_get_drvdata(dev);
+	int ret;
+
+	ret = kstrtobool(buf, &msc->stop_on_full);
+	if (ret)
+		return ret;
+
+	return size;
+}
+
+static DEVICE_ATTR_RW(stop_on_full);
+
+static struct attribute *msc_output_attrs[] = {
+	&dev_attr_wrap.attr,
+	&dev_attr_mode.attr,
+	&dev_attr_nr_pages.attr,
+	&dev_attr_win_switch.attr,
+	&dev_attr_stop_on_full.attr,
+	NULL,
+};
+
+static const struct attribute_group msc_output_group = {
+	.attrs	= msc_output_attrs,
+};
+
+static int intel_th_msc_probe(struct intel_th_device *thdev)
+{
+	struct device *dev = &thdev->dev;
+	struct resource *res;
+	struct msc *msc;
+	void __iomem *base;
+	int err;
+
+	res = intel_th_device_get_resource(thdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	base = devm_ioremap(dev, res->start, resource_size(res));
+	if (!base)
+		return -ENOMEM;
+
+	msc = devm_kzalloc(dev, sizeof(*msc), GFP_KERNEL);
+	if (!msc)
+		return -ENOMEM;
+
+	res = intel_th_device_get_resource(thdev, IORESOURCE_IRQ, 1);
+	if (!res)
+		msc->do_irq = 1;
+
+	if (INTEL_TH_CAP(to_intel_th(thdev), multi_is_broken))
+		msc->multi_is_broken = 1;
+
+	msc->index = thdev->id;
+
+	msc->thdev = thdev;
+	msc->reg_base = base + msc->index * 0x100;
+	msc->msu_base = base;
+
+	INIT_WORK(&msc->work, msc_work);
+	err = intel_th_msc_init(msc);
+	if (err)
+		return err;
+
+	dev_set_drvdata(dev, msc);
+
+	return 0;
+}
+
+static void intel_th_msc_remove(struct intel_th_device *thdev)
+{
+	struct msc *msc = dev_get_drvdata(&thdev->dev);
+	int ret;
+
+	intel_th_msc_deactivate(thdev);
+
+	/*
+	 * Buffers should not be used at this point except if the
+	 * output character device is still open and the parent
+	 * device gets detached from its bus, which is a FIXME.
+	 */
+	ret = msc_buffer_free_unless_used(msc);
+	WARN_ON_ONCE(ret);
+}
+
+static struct intel_th_driver intel_th_msc_driver = {
+	.probe	= intel_th_msc_probe,
+	.remove	= intel_th_msc_remove,
+	.irq		= intel_th_msc_interrupt,
+	.wait_empty	= intel_th_msc_wait_empty,
+	.activate	= intel_th_msc_activate,
+	.deactivate	= intel_th_msc_deactivate,
+	.fops	= &intel_th_msc_fops,
+	.attr_group	= &msc_output_group,
+	.driver	= {
+		.name	= "msc",
+		.owner	= THIS_MODULE,
+	},
+};
+
+module_driver(intel_th_msc_driver,
+	      intel_th_driver_register,
+	      intel_th_driver_unregister);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel(R) Trace Hub Memory Storage Unit driver");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>");
diff --git a/drivers/hwtracing/intel_th/msu.h b/drivers/hwtracing/intel_th/msu.h
new file mode 100644
index 0000000000..e771f509bd
--- /dev/null
+++ b/drivers/hwtracing/intel_th/msu.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Intel(R) Trace Hub Memory Storage Unit (MSU) data structures
+ *
+ * Copyright (C) 2014-2015 Intel Corporation.
+ */
+
+#ifndef __INTEL_TH_MSU_H__
+#define __INTEL_TH_MSU_H__
+
+enum {
+	REG_MSU_MSUPARAMS	= 0x0000,
+	REG_MSU_MSUSTS		= 0x0008,
+	REG_MSU_MINTCTL		= 0x0004, /* MSU-global interrupt control */
+	REG_MSU_MSC0CTL		= 0x0100, /* MSC0 control */
+	REG_MSU_MSC0STS		= 0x0104, /* MSC0 status */
+	REG_MSU_MSC0BAR		= 0x0108, /* MSC0 output base address */
+	REG_MSU_MSC0SIZE	= 0x010c, /* MSC0 output size */
+	REG_MSU_MSC0MWP		= 0x0110, /* MSC0 write pointer */
+	REG_MSU_MSC0NWSA	= 0x011c, /* MSC0 next window start address */
+
+	REG_MSU_MSC1CTL		= 0x0200, /* MSC1 control */
+	REG_MSU_MSC1STS		= 0x0204, /* MSC1 status */
+	REG_MSU_MSC1BAR		= 0x0208, /* MSC1 output base address */
+	REG_MSU_MSC1SIZE	= 0x020c, /* MSC1 output size */
+	REG_MSU_MSC1MWP		= 0x0210, /* MSC1 write pointer */
+	REG_MSU_MSC1NWSA	= 0x021c, /* MSC1 next window start address */
+};
+
+/* MSUSTS bits */
+#define MSUSTS_MSU_INT	BIT(0)
+#define MSUSTS_MSC0BLAST	BIT(16)
+#define MSUSTS_MSC1BLAST	BIT(24)
+
+/* MSCnCTL bits */
+#define MSC_EN		BIT(0)
+#define MSC_WRAPEN	BIT(1)
+#define MSC_RD_HDR_OVRD	BIT(2)
+#define MSC_MODE	(BIT(4) | BIT(5))
+#define MSC_LEN		(BIT(8) | BIT(9) | BIT(10))
+
+/* MINTCTL bits */
+#define MICDE		BIT(0)
+#define M0BLIE		BIT(16)
+#define M1BLIE		BIT(24)
+
+/* MSCnSTS bits */
+#define MSCSTS_WRAPSTAT	BIT(1)	/* Wrap occurred */
+#define MSCSTS_PLE	BIT(2)	/* Pipeline Empty */
+
+/*
+ * Multiblock/multiwindow block descriptor
+ */
+struct msc_block_desc {
+	u32	sw_tag;
+	u32	block_sz;
+	u32	next_blk;
+	u32	next_win;
+	u32	res0[4];
+	u32	hw_tag;
+	u32	valid_dw;
+	u32	ts_low;
+	u32	ts_high;
+	u32	res1[4];
+} __packed;
+
+#define MSC_BDESC	sizeof(struct msc_block_desc)
+#define DATA_IN_PAGE	(PAGE_SIZE - MSC_BDESC)
+
+/* MSC multiblock sw tag bits */
+#define MSC_SW_TAG_LASTBLK	BIT(0)
+#define MSC_SW_TAG_LASTWIN	BIT(1)
+
+/* MSC multiblock hw tag bits */
+#define MSC_HW_TAG_TRIGGER	BIT(0)
+#define MSC_HW_TAG_BLOCKWRAP	BIT(1)
+#define MSC_HW_TAG_WINWRAP	BIT(2)
+#define MSC_HW_TAG_ENDBIT	BIT(3)
+
+static inline unsigned long msc_data_sz(struct msc_block_desc *bdesc)
+{
+	if (!bdesc->valid_dw)
+		return 0;
+
+	return bdesc->valid_dw * 4 - MSC_BDESC;
+}
+
+static inline unsigned long msc_total_sz(struct msc_block_desc *bdesc)
+{
+	return bdesc->valid_dw * 4;
+}
+
+static inline unsigned long msc_block_sz(struct msc_block_desc *bdesc)
+{
+	return bdesc->block_sz * 64 - MSC_BDESC;
+}
+
+static inline bool msc_block_wrapped(struct msc_block_desc *bdesc)
+{
+	if (bdesc->hw_tag & (MSC_HW_TAG_BLOCKWRAP | MSC_HW_TAG_WINWRAP))
+		return true;
+
+	return false;
+}
+
+static inline bool msc_block_last_written(struct msc_block_desc *bdesc)
+{
+	if ((bdesc->hw_tag & MSC_HW_TAG_ENDBIT) ||
+	    (msc_data_sz(bdesc) != msc_block_sz(bdesc)))
+		return true;
+
+	return false;
+}
+
+/* waiting for Pipeline Empty bit(s) to assert for MSC */
+#define MSC_PLE_WAITLOOP_DEPTH	10000
+
+#endif /* __INTEL_TH_MSU_H__ */
diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c
new file mode 100644
index 0000000000..147d338c19
--- /dev/null
+++ b/drivers/hwtracing/intel_th/pci.c
@@ -0,0 +1,328 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel(R) Trace Hub pci driver
+ *
+ * Copyright (C) 2014-2015 Intel Corporation.
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <linux/pci.h>
+
+#include "intel_th.h"
+
+#define DRIVER_NAME "intel_th_pci"
+
+enum {
+	TH_PCI_CONFIG_BAR	= 0,
+	TH_PCI_STH_SW_BAR	= 2,
+	TH_PCI_RTIT_BAR		= 4,
+};
+
+#define BAR_MASK (BIT(TH_PCI_CONFIG_BAR) | BIT(TH_PCI_STH_SW_BAR))
+
+#define PCI_REG_NPKDSC	0x80
+#define NPKDSC_TSACT	BIT(5)
+
+static int intel_th_pci_activate(struct intel_th *th)
+{
+	struct pci_dev *pdev = to_pci_dev(th->dev);
+	u32 npkdsc;
+	int err;
+
+	if (!INTEL_TH_CAP(th, tscu_enable))
+		return 0;
+
+	err = pci_read_config_dword(pdev, PCI_REG_NPKDSC, &npkdsc);
+	if (!err) {
+		npkdsc |= NPKDSC_TSACT;
+		err = pci_write_config_dword(pdev, PCI_REG_NPKDSC, npkdsc);
+	}
+
+	if (err)
+		dev_err(&pdev->dev, "failed to read NPKDSC register\n");
+
+	return err;
+}
+
+static void intel_th_pci_deactivate(struct intel_th *th)
+{
+	struct pci_dev *pdev = to_pci_dev(th->dev);
+	u32 npkdsc;
+	int err;
+
+	if (!INTEL_TH_CAP(th, tscu_enable))
+		return;
+
+	err = pci_read_config_dword(pdev, PCI_REG_NPKDSC, &npkdsc);
+	if (!err) {
+		npkdsc |= NPKDSC_TSACT;
+		err = pci_write_config_dword(pdev, PCI_REG_NPKDSC, npkdsc);
+	}
+
+	if (err)
+		dev_err(&pdev->dev, "failed to read NPKDSC register\n");
+}
+
+static int intel_th_pci_probe(struct pci_dev *pdev,
+			      const struct pci_device_id *id)
+{
+	const struct intel_th_drvdata *drvdata = (void *)id->driver_data;
+	struct resource resource[TH_MMIO_END + TH_NVEC_MAX] = {
+		[TH_MMIO_CONFIG]	= pdev->resource[TH_PCI_CONFIG_BAR],
+		[TH_MMIO_SW]		= pdev->resource[TH_PCI_STH_SW_BAR],
+	};
+	int err, r = TH_MMIO_SW + 1, i;
+	struct intel_th *th;
+
+	err = pcim_enable_device(pdev);
+	if (err)
+		return err;
+
+	err = pcim_iomap_regions_request_all(pdev, BAR_MASK, DRIVER_NAME);
+	if (err)
+		return err;
+
+	if (pdev->resource[TH_PCI_RTIT_BAR].start) {
+		resource[TH_MMIO_RTIT] = pdev->resource[TH_PCI_RTIT_BAR];
+		r++;
+	}
+
+	err = pci_alloc_irq_vectors(pdev, 1, 8, PCI_IRQ_ALL_TYPES);
+	if (err > 0)
+		for (i = 0; i < err; i++, r++) {
+			resource[r].flags = IORESOURCE_IRQ;
+			resource[r].start = pci_irq_vector(pdev, i);
+		}
+
+	th = intel_th_alloc(&pdev->dev, drvdata, resource, r);
+	if (IS_ERR(th)) {
+		err = PTR_ERR(th);
+		goto err_free_irq;
+	}
+
+	th->activate   = intel_th_pci_activate;
+	th->deactivate = intel_th_pci_deactivate;
+
+	pci_set_master(pdev);
+
+	return 0;
+
+err_free_irq:
+	pci_free_irq_vectors(pdev);
+	return err;
+}
+
+static void intel_th_pci_remove(struct pci_dev *pdev)
+{
+	struct intel_th *th = pci_get_drvdata(pdev);
+
+	intel_th_free(th);
+
+	pci_free_irq_vectors(pdev);
+}
+
+static const struct intel_th_drvdata intel_th_1x_multi_is_broken = {
+	.multi_is_broken	= 1,
+};
+
+static const struct intel_th_drvdata intel_th_2x = {
+	.tscu_enable	= 1,
+	.has_mintctl	= 1,
+};
+
+static const struct pci_device_id intel_th_pci_id_table[] = {
+	{
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x9d26),
+		.driver_data = (kernel_ulong_t)0,
+	},
+	{
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa126),
+		.driver_data = (kernel_ulong_t)0,
+	},
+	{
+		/* Apollo Lake */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x5a8e),
+		.driver_data = (kernel_ulong_t)0,
+	},
+	{
+		/* Broxton */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x0a80),
+		.driver_data = (kernel_ulong_t)0,
+	},
+	{
+		/* Broxton B-step */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x1a8e),
+		.driver_data = (kernel_ulong_t)0,
+	},
+	{
+		/* Kaby Lake PCH-H */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa2a6),
+		.driver_data = (kernel_ulong_t)&intel_th_1x_multi_is_broken,
+	},
+	{
+		/* Denverton */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x19e1),
+		.driver_data = (kernel_ulong_t)0,
+	},
+	{
+		/* Lewisburg PCH */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa1a6),
+		.driver_data = (kernel_ulong_t)0,
+	},
+	{
+		/* Lewisburg PCH */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa226),
+		.driver_data = (kernel_ulong_t)0,
+	},
+	{
+		/* Gemini Lake */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x318e),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Cannon Lake H */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa326),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Cannon Lake LP */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x9da6),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Cedar Fork PCH */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x18e1),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Ice Lake PCH */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x34a6),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Comet Lake */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x02a6),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Comet Lake PCH */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x06a6),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Comet Lake PCH-V */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa3a6),
+		.driver_data = (kernel_ulong_t)&intel_th_1x_multi_is_broken,
+	},
+	{
+		/* Ice Lake NNPI */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x45c5),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Ice Lake CPU */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x8a29),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Tiger Lake CPU */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x9a33),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Tiger Lake PCH */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa0a6),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Tiger Lake PCH-H */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x43a6),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Jasper Lake PCH */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4da6),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Jasper Lake CPU */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4e29),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Elkhart Lake CPU */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4529),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Elkhart Lake */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4b26),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Emmitsburg PCH */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x1bcc),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Alder Lake */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7aa6),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Alder Lake-P */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x51a6),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Alder Lake-M */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x54a6),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Meteor Lake-P */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7e24),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Raptor Lake-S */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7a26),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Raptor Lake-S CPU */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa76f),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Alder Lake CPU */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x466f),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{
+		/* Rocket Lake CPU */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4c19),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
+	{ 0 },
+};
+
+MODULE_DEVICE_TABLE(pci, intel_th_pci_id_table);
+
+static struct pci_driver intel_th_pci_driver = {
+	.name		= DRIVER_NAME,
+	.id_table	= intel_th_pci_id_table,
+	.probe		= intel_th_pci_probe,
+	.remove		= intel_th_pci_remove,
+};
+
+module_pci_driver(intel_th_pci_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel(R) Trace Hub PCI controller driver");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@intel.com>");
diff --git a/drivers/hwtracing/intel_th/pti.c b/drivers/hwtracing/intel_th/pti.c
new file mode 100644
index 0000000000..09132ab8bc
--- /dev/null
+++ b/drivers/hwtracing/intel_th/pti.c
@@ -0,0 +1,343 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel(R) Trace Hub PTI output driver
+ *
+ * Copyright (C) 2014-2016 Intel Corporation.
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/sizes.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+
+#include "intel_th.h"
+#include "pti.h"
+
+struct pti_device {
+	void __iomem		*base;
+	struct intel_th_device	*thdev;
+	unsigned int		mode;
+	unsigned int		freeclk;
+	unsigned int		clkdiv;
+	unsigned int		patgen;
+	unsigned int		lpp_dest_mask;
+	unsigned int		lpp_dest;
+};
+
+/* map PTI widths to MODE settings of PTI_CTL register */
+static const unsigned int pti_mode[] = {
+	0, 4, 8, 0, 12, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static int pti_width_mode(unsigned int width)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(pti_mode); i++)
+		if (pti_mode[i] == width)
+			return i;
+
+	return -EINVAL;
+}
+
+static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	struct pti_device *pti = dev_get_drvdata(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", pti_mode[pti->mode]);
+}
+
+static ssize_t mode_store(struct device *dev, struct device_attribute *attr,
+			  const char *buf, size_t size)
+{
+	struct pti_device *pti = dev_get_drvdata(dev);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	ret = pti_width_mode(val);
+	if (ret < 0)
+		return ret;
+
+	pti->mode = ret;
+
+	return size;
+}
+
+static DEVICE_ATTR_RW(mode);
+
+static ssize_t
+freerunning_clock_show(struct device *dev, struct device_attribute *attr,
+		       char *buf)
+{
+	struct pti_device *pti = dev_get_drvdata(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", pti->freeclk);
+}
+
+static ssize_t
+freerunning_clock_store(struct device *dev, struct device_attribute *attr,
+			const char *buf, size_t size)
+{
+	struct pti_device *pti = dev_get_drvdata(dev);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	pti->freeclk = !!val;
+
+	return size;
+}
+
+static DEVICE_ATTR_RW(freerunning_clock);
+
+static ssize_t
+clock_divider_show(struct device *dev, struct device_attribute *attr,
+		   char *buf)
+{
+	struct pti_device *pti = dev_get_drvdata(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", 1u << pti->clkdiv);
+}
+
+static ssize_t
+clock_divider_store(struct device *dev, struct device_attribute *attr,
+		    const char *buf, size_t size)
+{
+	struct pti_device *pti = dev_get_drvdata(dev);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (!is_power_of_2(val) || val > 8 || !val)
+		return -EINVAL;
+
+	pti->clkdiv = val;
+
+	return size;
+}
+
+static DEVICE_ATTR_RW(clock_divider);
+
+static struct attribute *pti_output_attrs[] = {
+	&dev_attr_mode.attr,
+	&dev_attr_freerunning_clock.attr,
+	&dev_attr_clock_divider.attr,
+	NULL,
+};
+
+static const struct attribute_group pti_output_group = {
+	.attrs	= pti_output_attrs,
+};
+
+static int intel_th_pti_activate(struct intel_th_device *thdev)
+{
+	struct pti_device *pti = dev_get_drvdata(&thdev->dev);
+	u32 ctl = PTI_EN;
+
+	if (pti->patgen)
+		ctl |= pti->patgen << __ffs(PTI_PATGENMODE);
+	if (pti->freeclk)
+		ctl |= PTI_FCEN;
+	ctl |= pti->mode << __ffs(PTI_MODE);
+	ctl |= pti->clkdiv << __ffs(PTI_CLKDIV);
+	ctl |= pti->lpp_dest << __ffs(LPP_DEST);
+
+	iowrite32(ctl, pti->base + REG_PTI_CTL);
+
+	intel_th_trace_enable(thdev);
+
+	return 0;
+}
+
+static void intel_th_pti_deactivate(struct intel_th_device *thdev)
+{
+	struct pti_device *pti = dev_get_drvdata(&thdev->dev);
+
+	intel_th_trace_disable(thdev);
+
+	iowrite32(0, pti->base + REG_PTI_CTL);
+}
+
+static void read_hw_config(struct pti_device *pti)
+{
+	u32 ctl = ioread32(pti->base + REG_PTI_CTL);
+
+	pti->mode	= (ctl & PTI_MODE) >> __ffs(PTI_MODE);
+	pti->clkdiv	= (ctl & PTI_CLKDIV) >> __ffs(PTI_CLKDIV);
+	pti->freeclk	= !!(ctl & PTI_FCEN);
+
+	if (!pti_mode[pti->mode])
+		pti->mode = pti_width_mode(4);
+	if (!pti->clkdiv)
+		pti->clkdiv = 1;
+
+	if (pti->thdev->output.type == GTH_LPP) {
+		if (ctl & LPP_PTIPRESENT)
+			pti->lpp_dest_mask |= LPP_DEST_PTI;
+		if (ctl & LPP_BSSBPRESENT)
+			pti->lpp_dest_mask |= LPP_DEST_EXI;
+		if (ctl & LPP_DEST)
+			pti->lpp_dest = 1;
+	}
+}
+
+static int intel_th_pti_probe(struct intel_th_device *thdev)
+{
+	struct device *dev = &thdev->dev;
+	struct resource *res;
+	struct pti_device *pti;
+	void __iomem *base;
+
+	res = intel_th_device_get_resource(thdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	base = devm_ioremap(dev, res->start, resource_size(res));
+	if (!base)
+		return -ENOMEM;
+
+	pti = devm_kzalloc(dev, sizeof(*pti), GFP_KERNEL);
+	if (!pti)
+		return -ENOMEM;
+
+	pti->thdev = thdev;
+	pti->base = base;
+
+	read_hw_config(pti);
+
+	dev_set_drvdata(dev, pti);
+
+	return 0;
+}
+
+static void intel_th_pti_remove(struct intel_th_device *thdev)
+{
+}
+
+static struct intel_th_driver intel_th_pti_driver = {
+	.probe	= intel_th_pti_probe,
+	.remove	= intel_th_pti_remove,
+	.activate	= intel_th_pti_activate,
+	.deactivate	= intel_th_pti_deactivate,
+	.attr_group	= &pti_output_group,
+	.driver	= {
+		.name	= "pti",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static const char * const lpp_dest_str[] = { "pti", "exi" };
+
+static ssize_t lpp_dest_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	struct pti_device *pti = dev_get_drvdata(dev);
+	ssize_t ret = 0;
+	int i;
+
+	for (i = ARRAY_SIZE(lpp_dest_str) - 1; i >= 0; i--) {
+		const char *fmt = pti->lpp_dest == i ? "[%s] " : "%s ";
+
+		if (!(pti->lpp_dest_mask & BIT(i)))
+			continue;
+
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+				 fmt, lpp_dest_str[i]);
+	}
+
+	if (ret)
+		buf[ret - 1] = '\n';
+
+	return ret;
+}
+
+static ssize_t lpp_dest_store(struct device *dev, struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	struct pti_device *pti = dev_get_drvdata(dev);
+	int i;
+
+	i = sysfs_match_string(lpp_dest_str, buf);
+	if (i < 0)
+		return i;
+
+	if (!(pti->lpp_dest_mask & BIT(i)))
+		return -EINVAL;
+
+	pti->lpp_dest = i;
+	return size;
+}
+
+static DEVICE_ATTR_RW(lpp_dest);
+
+static struct attribute *lpp_output_attrs[] = {
+	&dev_attr_mode.attr,
+	&dev_attr_freerunning_clock.attr,
+	&dev_attr_clock_divider.attr,
+	&dev_attr_lpp_dest.attr,
+	NULL,
+};
+
+static const struct attribute_group lpp_output_group = {
+	.attrs	= lpp_output_attrs,
+};
+
+static struct intel_th_driver intel_th_lpp_driver = {
+	.probe		= intel_th_pti_probe,
+	.remove		= intel_th_pti_remove,
+	.activate	= intel_th_pti_activate,
+	.deactivate	= intel_th_pti_deactivate,
+	.attr_group	= &lpp_output_group,
+	.driver	= {
+		.name	= "lpp",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init intel_th_pti_lpp_init(void)
+{
+	int err;
+
+	err = intel_th_driver_register(&intel_th_pti_driver);
+	if (err)
+		return err;
+
+	err = intel_th_driver_register(&intel_th_lpp_driver);
+	if (err) {
+		intel_th_driver_unregister(&intel_th_pti_driver);
+		return err;
+	}
+
+	return 0;
+}
+
+module_init(intel_th_pti_lpp_init);
+
+static void __exit intel_th_pti_lpp_exit(void)
+{
+	intel_th_driver_unregister(&intel_th_pti_driver);
+	intel_th_driver_unregister(&intel_th_lpp_driver);
+}
+
+module_exit(intel_th_pti_lpp_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel(R) Trace Hub PTI/LPP output driver");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>");
diff --git a/drivers/hwtracing/intel_th/pti.h b/drivers/hwtracing/intel_th/pti.h
new file mode 100644
index 0000000000..7dfc043133
--- /dev/null
+++ b/drivers/hwtracing/intel_th/pti.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Intel(R) Trace Hub PTI output data structures
+ *
+ * Copyright (C) 2014-2015 Intel Corporation.
+ */
+
+#ifndef __INTEL_TH_STH_H__
+#define __INTEL_TH_STH_H__
+
+enum {
+	REG_PTI_CTL	= 0x1c00,
+};
+
+#define PTI_EN		BIT(0)
+#define PTI_FCEN	BIT(1)
+#define PTI_MODE	0xf0
+#define LPP_PTIPRESENT	BIT(8)
+#define LPP_BSSBPRESENT	BIT(9)
+#define PTI_CLKDIV	0x000f0000
+#define PTI_PATGENMODE	0x00f00000
+#define LPP_DEST	BIT(25)
+#define LPP_BSSBACT	BIT(30)
+#define LPP_LPPBUSY	BIT(31)
+
+#define LPP_DEST_PTI	BIT(0)
+#define LPP_DEST_EXI	BIT(1)
+
+#endif /* __INTEL_TH_STH_H__ */
diff --git a/drivers/hwtracing/intel_th/sth.c b/drivers/hwtracing/intel_th/sth.c
new file mode 100644
index 0000000000..9ca8c4e045
--- /dev/null
+++ b/drivers/hwtracing/intel_th/sth.c
@@ -0,0 +1,261 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel(R) Trace Hub Software Trace Hub support
+ *
+ * Copyright (C) 2014-2015 Intel Corporation.
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/stm.h>
+
+#include "intel_th.h"
+#include "sth.h"
+
+struct sth_device {
+	void __iomem	*base;
+	void __iomem	*channels;
+	phys_addr_t	channels_phys;
+	struct device	*dev;
+	struct stm_data	stm;
+	unsigned int	sw_nmasters;
+};
+
+static struct intel_th_channel __iomem *
+sth_channel(struct sth_device *sth, unsigned int master, unsigned int channel)
+{
+	struct intel_th_channel __iomem *sw_map = sth->channels;
+
+	return &sw_map[(master - sth->stm.sw_start) * sth->stm.sw_nchannels +
+		       channel];
+}
+
+static void sth_iowrite(void __iomem *dest, const unsigned char *payload,
+			unsigned int size)
+{
+	switch (size) {
+#ifdef CONFIG_64BIT
+	case 8:
+		writeq_relaxed(*(u64 *)payload, dest);
+		break;
+#endif
+	case 4:
+		writel_relaxed(*(u32 *)payload, dest);
+		break;
+	case 2:
+		writew_relaxed(*(u16 *)payload, dest);
+		break;
+	case 1:
+		writeb_relaxed(*(u8 *)payload, dest);
+		break;
+	default:
+		break;
+	}
+}
+
+static ssize_t notrace sth_stm_packet(struct stm_data *stm_data,
+				      unsigned int master,
+				      unsigned int channel,
+				      unsigned int packet,
+				      unsigned int flags,
+				      unsigned int size,
+				      const unsigned char *payload)
+{
+	struct sth_device *sth = container_of(stm_data, struct sth_device, stm);
+	struct intel_th_channel __iomem *out =
+		sth_channel(sth, master, channel);
+	u64 __iomem *outp = &out->Dn;
+	unsigned long reg = REG_STH_TRIG;
+
+#ifndef CONFIG_64BIT
+	if (size > 4)
+		size = 4;
+#endif
+
+	size = rounddown_pow_of_two(size);
+
+	switch (packet) {
+	/* Global packets (GERR, XSYNC, TRIG) are sent with register writes */
+	case STP_PACKET_GERR:
+		reg += 4;
+		fallthrough;
+
+	case STP_PACKET_XSYNC:
+		reg += 8;
+		fallthrough;
+
+	case STP_PACKET_TRIG:
+		if (flags & STP_PACKET_TIMESTAMPED)
+			reg += 4;
+		writeb_relaxed(*payload, sth->base + reg);
+		break;
+
+	case STP_PACKET_MERR:
+		if (size > 4)
+			size = 4;
+
+		sth_iowrite(&out->MERR, payload, size);
+		break;
+
+	case STP_PACKET_FLAG:
+		if (flags & STP_PACKET_TIMESTAMPED)
+			outp = (u64 __iomem *)&out->FLAG_TS;
+		else
+			outp = (u64 __iomem *)&out->FLAG;
+
+		size = 0;
+		writeb_relaxed(0, outp);
+		break;
+
+	case STP_PACKET_USER:
+		if (flags & STP_PACKET_TIMESTAMPED)
+			outp = &out->USER_TS;
+		else
+			outp = &out->USER;
+		sth_iowrite(outp, payload, size);
+		break;
+
+	case STP_PACKET_DATA:
+		outp = &out->Dn;
+
+		if (flags & STP_PACKET_TIMESTAMPED)
+			outp += 2;
+		if (flags & STP_PACKET_MARKED)
+			outp++;
+
+		sth_iowrite(outp, payload, size);
+		break;
+	default:
+		return -ENOTSUPP;
+	}
+
+	return size;
+}
+
+static phys_addr_t
+sth_stm_mmio_addr(struct stm_data *stm_data, unsigned int master,
+		  unsigned int channel, unsigned int nr_chans)
+{
+	struct sth_device *sth = container_of(stm_data, struct sth_device, stm);
+	phys_addr_t addr;
+
+	master -= sth->stm.sw_start;
+	addr = sth->channels_phys + (master * sth->stm.sw_nchannels + channel) *
+		sizeof(struct intel_th_channel);
+
+	if (offset_in_page(addr) ||
+	    offset_in_page(nr_chans * sizeof(struct intel_th_channel)))
+		return 0;
+
+	return addr;
+}
+
+static int sth_stm_link(struct stm_data *stm_data, unsigned int master,
+			 unsigned int channel)
+{
+	struct sth_device *sth = container_of(stm_data, struct sth_device, stm);
+
+	return intel_th_set_output(to_intel_th_device(sth->dev), master);
+}
+
+static int intel_th_sw_init(struct sth_device *sth)
+{
+	u32 reg;
+
+	reg = ioread32(sth->base + REG_STH_STHCAP1);
+	sth->stm.sw_nchannels = reg & 0xff;
+
+	reg = ioread32(sth->base + REG_STH_STHCAP0);
+	sth->stm.sw_start = reg & 0xffff;
+	sth->stm.sw_end = reg >> 16;
+
+	sth->sw_nmasters = sth->stm.sw_end - sth->stm.sw_start;
+	dev_dbg(sth->dev, "sw_start: %x sw_end: %x masters: %x nchannels: %x\n",
+		sth->stm.sw_start, sth->stm.sw_end, sth->sw_nmasters,
+		sth->stm.sw_nchannels);
+
+	return 0;
+}
+
+static int intel_th_sth_probe(struct intel_th_device *thdev)
+{
+	struct device *dev = &thdev->dev;
+	struct sth_device *sth;
+	struct resource *res;
+	void __iomem *base, *channels;
+	int err;
+
+	res = intel_th_device_get_resource(thdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	base = devm_ioremap(dev, res->start, resource_size(res));
+	if (!base)
+		return -ENOMEM;
+
+	res = intel_th_device_get_resource(thdev, IORESOURCE_MEM, 1);
+	if (!res)
+		return -ENODEV;
+
+	channels = devm_ioremap(dev, res->start, resource_size(res));
+	if (!channels)
+		return -ENOMEM;
+
+	sth = devm_kzalloc(dev, sizeof(*sth), GFP_KERNEL);
+	if (!sth)
+		return -ENOMEM;
+
+	sth->dev = dev;
+	sth->base = base;
+	sth->channels = channels;
+	sth->channels_phys = res->start;
+	sth->stm.name = dev_name(dev);
+	sth->stm.packet = sth_stm_packet;
+	sth->stm.mmio_addr = sth_stm_mmio_addr;
+	sth->stm.sw_mmiosz = sizeof(struct intel_th_channel);
+	sth->stm.link = sth_stm_link;
+
+	err = intel_th_sw_init(sth);
+	if (err)
+		return err;
+
+	err = stm_register_device(dev, &sth->stm, THIS_MODULE);
+	if (err) {
+		dev_err(dev, "stm_register_device failed\n");
+		return err;
+	}
+
+	dev_set_drvdata(dev, sth);
+
+	return 0;
+}
+
+static void intel_th_sth_remove(struct intel_th_device *thdev)
+{
+	struct sth_device *sth = dev_get_drvdata(&thdev->dev);
+
+	stm_unregister_device(&sth->stm);
+}
+
+static struct intel_th_driver intel_th_sth_driver = {
+	.probe	= intel_th_sth_probe,
+	.remove	= intel_th_sth_remove,
+	.driver	= {
+		.name	= "sth",
+		.owner	= THIS_MODULE,
+	},
+};
+
+module_driver(intel_th_sth_driver,
+	      intel_th_driver_register,
+	      intel_th_driver_unregister);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel(R) Trace Hub Software Trace Hub driver");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@intel.com>");
diff --git a/drivers/hwtracing/intel_th/sth.h b/drivers/hwtracing/intel_th/sth.h
new file mode 100644
index 0000000000..f97fc0c517
--- /dev/null
+++ b/drivers/hwtracing/intel_th/sth.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Intel(R) Trace Hub Software Trace Hub (STH) data structures
+ *
+ * Copyright (C) 2014-2015 Intel Corporation.
+ */
+
+#ifndef __INTEL_TH_STH_H__
+#define __INTEL_TH_STH_H__
+
+enum {
+	REG_STH_STHCAP0		= 0x0000, /* capabilities pt1 */
+	REG_STH_STHCAP1		= 0x0004, /* capabilities pt2 */
+	REG_STH_TRIG		= 0x0008, /* TRIG packet payload */
+	REG_STH_TRIG_TS		= 0x000c, /* TRIG_TS packet payload */
+	REG_STH_XSYNC		= 0x0010, /* XSYNC packet payload */
+	REG_STH_XSYNC_TS	= 0x0014, /* XSYNC_TS packet payload */
+	REG_STH_GERR		= 0x0018, /* GERR packet payload */
+};
+
+struct intel_th_channel {
+	u64	Dn;
+	u64	DnM;
+	u64	DnTS;
+	u64	DnMTS;
+	u64	USER;
+	u64	USER_TS;
+	u32	FLAG;
+	u32	FLAG_TS;
+	u32	MERR;
+	u32	__unused;
+} __packed;
+
+#endif /* __INTEL_TH_STH_H__ */
diff --git a/drivers/hwtracing/ptt/Kconfig b/drivers/hwtracing/ptt/Kconfig
new file mode 100644
index 0000000000..6d46a09ffe
--- /dev/null
+++ b/drivers/hwtracing/ptt/Kconfig
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config HISI_PTT
+	tristate "HiSilicon PCIe Tune and Trace Device"
+	depends on ARM64 || (COMPILE_TEST && 64BIT)
+	depends on PCI && HAS_DMA && HAS_IOMEM && PERF_EVENTS
+	help
+	  HiSilicon PCIe Tune and Trace device exists as a PCIe RCiEP
+	  device, and it provides support for PCIe traffic tuning and
+	  tracing TLP headers to the memory.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called hisi_ptt.
diff --git a/drivers/hwtracing/ptt/Makefile b/drivers/hwtracing/ptt/Makefile
new file mode 100644
index 0000000000..908c09a981
--- /dev/null
+++ b/drivers/hwtracing/ptt/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_HISI_PTT) += hisi_ptt.o
diff --git a/drivers/hwtracing/ptt/hisi_ptt.c b/drivers/hwtracing/ptt/hisi_ptt.c
new file mode 100644
index 0000000000..a991ecb751
--- /dev/null
+++ b/drivers/hwtracing/ptt/hisi_ptt.c
@@ -0,0 +1,1438 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for HiSilicon PCIe tune and trace device
+ *
+ * Copyright (c) 2022 HiSilicon Technologies Co., Ltd.
+ * Author: Yicong Yang <yangyicong@hisilicon.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/cpuhotplug.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iommu.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/sysfs.h>
+#include <linux/vmalloc.h>
+
+#include "hisi_ptt.h"
+
+/* Dynamic CPU hotplug state used by PTT */
+static enum cpuhp_state hisi_ptt_pmu_online;
+
+static bool hisi_ptt_wait_tuning_finish(struct hisi_ptt *hisi_ptt)
+{
+	u32 val;
+
+	return !readl_poll_timeout(hisi_ptt->iobase + HISI_PTT_TUNING_INT_STAT,
+				   val, !(val & HISI_PTT_TUNING_INT_STAT_MASK),
+				   HISI_PTT_WAIT_POLL_INTERVAL_US,
+				   HISI_PTT_WAIT_TUNE_TIMEOUT_US);
+}
+
+static ssize_t hisi_ptt_tune_attr_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev));
+	struct dev_ext_attribute *ext_attr;
+	struct hisi_ptt_tune_desc *desc;
+	u32 reg;
+	u16 val;
+
+	ext_attr = container_of(attr, struct dev_ext_attribute, attr);
+	desc = ext_attr->var;
+
+	mutex_lock(&hisi_ptt->tune_lock);
+
+	reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
+	reg &= ~(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB);
+	reg |= FIELD_PREP(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB,
+			  desc->event_code);
+	writel(reg, hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
+
+	/* Write all 1 to indicates it's the read process */
+	writel(~0U, hisi_ptt->iobase + HISI_PTT_TUNING_DATA);
+
+	if (!hisi_ptt_wait_tuning_finish(hisi_ptt)) {
+		mutex_unlock(&hisi_ptt->tune_lock);
+		return -ETIMEDOUT;
+	}
+
+	reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_DATA);
+	reg &= HISI_PTT_TUNING_DATA_VAL_MASK;
+	val = FIELD_GET(HISI_PTT_TUNING_DATA_VAL_MASK, reg);
+
+	mutex_unlock(&hisi_ptt->tune_lock);
+	return sysfs_emit(buf, "%u\n", val);
+}
+
+static ssize_t hisi_ptt_tune_attr_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev));
+	struct dev_ext_attribute *ext_attr;
+	struct hisi_ptt_tune_desc *desc;
+	u32 reg;
+	u16 val;
+
+	ext_attr = container_of(attr, struct dev_ext_attribute, attr);
+	desc = ext_attr->var;
+
+	if (kstrtou16(buf, 10, &val))
+		return -EINVAL;
+
+	mutex_lock(&hisi_ptt->tune_lock);
+
+	reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
+	reg &= ~(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB);
+	reg |= FIELD_PREP(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB,
+			  desc->event_code);
+	writel(reg, hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
+	writel(FIELD_PREP(HISI_PTT_TUNING_DATA_VAL_MASK, val),
+	       hisi_ptt->iobase + HISI_PTT_TUNING_DATA);
+
+	if (!hisi_ptt_wait_tuning_finish(hisi_ptt)) {
+		mutex_unlock(&hisi_ptt->tune_lock);
+		return -ETIMEDOUT;
+	}
+
+	mutex_unlock(&hisi_ptt->tune_lock);
+	return count;
+}
+
+#define HISI_PTT_TUNE_ATTR(_name, _val, _show, _store)			\
+	static struct hisi_ptt_tune_desc _name##_desc = {		\
+		.name = #_name,						\
+		.event_code = (_val),					\
+	};								\
+	static struct dev_ext_attribute hisi_ptt_##_name##_attr = {	\
+		.attr	= __ATTR(_name, 0600, _show, _store),		\
+		.var	= &_name##_desc,				\
+	}
+
+#define HISI_PTT_TUNE_ATTR_COMMON(_name, _val)		\
+	HISI_PTT_TUNE_ATTR(_name, _val,			\
+			   hisi_ptt_tune_attr_show,	\
+			   hisi_ptt_tune_attr_store)
+
+/*
+ * The value of the tuning event are composed of two parts: main event code
+ * in BIT[0,15] and subevent code in BIT[16,23]. For example, qox_tx_cpl is
+ * a subevent of 'Tx path QoS control' which for tuning the weight of Tx
+ * completion TLPs. See hisi_ptt.rst documentation for more information.
+ */
+#define HISI_PTT_TUNE_QOS_TX_CPL		(0x4 | (3 << 16))
+#define HISI_PTT_TUNE_QOS_TX_NP			(0x4 | (4 << 16))
+#define HISI_PTT_TUNE_QOS_TX_P			(0x4 | (5 << 16))
+#define HISI_PTT_TUNE_RX_ALLOC_BUF_LEVEL	(0x5 | (6 << 16))
+#define HISI_PTT_TUNE_TX_ALLOC_BUF_LEVEL	(0x5 | (7 << 16))
+
+HISI_PTT_TUNE_ATTR_COMMON(qos_tx_cpl, HISI_PTT_TUNE_QOS_TX_CPL);
+HISI_PTT_TUNE_ATTR_COMMON(qos_tx_np, HISI_PTT_TUNE_QOS_TX_NP);
+HISI_PTT_TUNE_ATTR_COMMON(qos_tx_p, HISI_PTT_TUNE_QOS_TX_P);
+HISI_PTT_TUNE_ATTR_COMMON(rx_alloc_buf_level, HISI_PTT_TUNE_RX_ALLOC_BUF_LEVEL);
+HISI_PTT_TUNE_ATTR_COMMON(tx_alloc_buf_level, HISI_PTT_TUNE_TX_ALLOC_BUF_LEVEL);
+
+static struct attribute *hisi_ptt_tune_attrs[] = {
+	&hisi_ptt_qos_tx_cpl_attr.attr.attr,
+	&hisi_ptt_qos_tx_np_attr.attr.attr,
+	&hisi_ptt_qos_tx_p_attr.attr.attr,
+	&hisi_ptt_rx_alloc_buf_level_attr.attr.attr,
+	&hisi_ptt_tx_alloc_buf_level_attr.attr.attr,
+	NULL,
+};
+
+static struct attribute_group hisi_ptt_tune_group = {
+	.name	= "tune",
+	.attrs	= hisi_ptt_tune_attrs,
+};
+
+static u16 hisi_ptt_get_filter_val(u16 devid, bool is_port)
+{
+	if (is_port)
+		return BIT(HISI_PCIE_CORE_PORT_ID(devid & 0xff));
+
+	return devid;
+}
+
+static bool hisi_ptt_wait_trace_hw_idle(struct hisi_ptt *hisi_ptt)
+{
+	u32 val;
+
+	return !readl_poll_timeout_atomic(hisi_ptt->iobase + HISI_PTT_TRACE_STS,
+					  val, val & HISI_PTT_TRACE_IDLE,
+					  HISI_PTT_WAIT_POLL_INTERVAL_US,
+					  HISI_PTT_WAIT_TRACE_TIMEOUT_US);
+}
+
+static void hisi_ptt_wait_dma_reset_done(struct hisi_ptt *hisi_ptt)
+{
+	u32 val;
+
+	readl_poll_timeout_atomic(hisi_ptt->iobase + HISI_PTT_TRACE_WR_STS,
+				  val, !val, HISI_PTT_RESET_POLL_INTERVAL_US,
+				  HISI_PTT_RESET_TIMEOUT_US);
+}
+
+static void hisi_ptt_trace_end(struct hisi_ptt *hisi_ptt)
+{
+	writel(0, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
+	hisi_ptt->trace_ctrl.started = false;
+}
+
+static int hisi_ptt_trace_start(struct hisi_ptt *hisi_ptt)
+{
+	struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
+	u32 val;
+	int i;
+
+	/* Check device idle before start trace */
+	if (!hisi_ptt_wait_trace_hw_idle(hisi_ptt)) {
+		pci_err(hisi_ptt->pdev, "Failed to start trace, the device is still busy\n");
+		return -EBUSY;
+	}
+
+	ctrl->started = true;
+
+	/* Reset the DMA before start tracing */
+	val = readl(hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
+	val |= HISI_PTT_TRACE_CTRL_RST;
+	writel(val, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
+
+	hisi_ptt_wait_dma_reset_done(hisi_ptt);
+
+	val = readl(hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
+	val &= ~HISI_PTT_TRACE_CTRL_RST;
+	writel(val, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
+
+	/* Reset the index of current buffer */
+	hisi_ptt->trace_ctrl.buf_index = 0;
+
+	/* Zero the trace buffers */
+	for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; i++)
+		memset(ctrl->trace_buf[i].addr, 0, HISI_PTT_TRACE_BUF_SIZE);
+
+	/* Clear the interrupt status */
+	writel(HISI_PTT_TRACE_INT_STAT_MASK, hisi_ptt->iobase + HISI_PTT_TRACE_INT_STAT);
+	writel(0, hisi_ptt->iobase + HISI_PTT_TRACE_INT_MASK);
+
+	/* Set the trace control register */
+	val = FIELD_PREP(HISI_PTT_TRACE_CTRL_TYPE_SEL, ctrl->type);
+	val |= FIELD_PREP(HISI_PTT_TRACE_CTRL_RXTX_SEL, ctrl->direction);
+	val |= FIELD_PREP(HISI_PTT_TRACE_CTRL_DATA_FORMAT, ctrl->format);
+	val |= FIELD_PREP(HISI_PTT_TRACE_CTRL_TARGET_SEL, hisi_ptt->trace_ctrl.filter);
+	if (!hisi_ptt->trace_ctrl.is_port)
+		val |= HISI_PTT_TRACE_CTRL_FILTER_MODE;
+
+	/* Start the Trace */
+	val |= HISI_PTT_TRACE_CTRL_EN;
+	writel(val, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
+
+	return 0;
+}
+
+static int hisi_ptt_update_aux(struct hisi_ptt *hisi_ptt, int index, bool stop)
+{
+	struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
+	struct perf_output_handle *handle = &ctrl->handle;
+	struct perf_event *event = handle->event;
+	struct hisi_ptt_pmu_buf *buf;
+	size_t size;
+	void *addr;
+
+	buf = perf_get_aux(handle);
+	if (!buf || !handle->size)
+		return -EINVAL;
+
+	addr = ctrl->trace_buf[ctrl->buf_index].addr;
+
+	/*
+	 * If we're going to stop, read the size of already traced data from
+	 * HISI_PTT_TRACE_WR_STS. Otherwise we're coming from the interrupt,
+	 * the data size is always HISI_PTT_TRACE_BUF_SIZE.
+	 */
+	if (stop) {
+		u32 reg;
+
+		reg = readl(hisi_ptt->iobase + HISI_PTT_TRACE_WR_STS);
+		size = FIELD_GET(HISI_PTT_TRACE_WR_STS_WRITE, reg);
+	} else {
+		size = HISI_PTT_TRACE_BUF_SIZE;
+	}
+
+	memcpy(buf->base + buf->pos, addr, size);
+	buf->pos += size;
+
+	/*
+	 * Just commit the traced data if we're going to stop. Otherwise if the
+	 * resident AUX buffer cannot contain the data of next trace buffer,
+	 * apply a new one.
+	 */
+	if (stop) {
+		perf_aux_output_end(handle, buf->pos);
+	} else if (buf->length - buf->pos < HISI_PTT_TRACE_BUF_SIZE) {
+		perf_aux_output_end(handle, buf->pos);
+
+		buf = perf_aux_output_begin(handle, event);
+		if (!buf)
+			return -EINVAL;
+
+		buf->pos = handle->head % buf->length;
+		if (buf->length - buf->pos < HISI_PTT_TRACE_BUF_SIZE) {
+			perf_aux_output_end(handle, 0);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static irqreturn_t hisi_ptt_isr(int irq, void *context)
+{
+	struct hisi_ptt *hisi_ptt = context;
+	u32 status, buf_idx;
+
+	status = readl(hisi_ptt->iobase + HISI_PTT_TRACE_INT_STAT);
+	if (!(status & HISI_PTT_TRACE_INT_STAT_MASK))
+		return IRQ_NONE;
+
+	buf_idx = ffs(status) - 1;
+
+	/* Clear the interrupt status of buffer @buf_idx */
+	writel(status, hisi_ptt->iobase + HISI_PTT_TRACE_INT_STAT);
+
+	/*
+	 * Update the AUX buffer and cache the current buffer index,
+	 * as we need to know this and save the data when the trace
+	 * is ended out of the interrupt handler. End the trace
+	 * if the updating fails.
+	 */
+	if (hisi_ptt_update_aux(hisi_ptt, buf_idx, false))
+		hisi_ptt_trace_end(hisi_ptt);
+	else
+		hisi_ptt->trace_ctrl.buf_index = (buf_idx + 1) % HISI_PTT_TRACE_BUF_CNT;
+
+	return IRQ_HANDLED;
+}
+
+static void hisi_ptt_irq_free_vectors(void *pdev)
+{
+	pci_free_irq_vectors(pdev);
+}
+
+static int hisi_ptt_register_irq(struct hisi_ptt *hisi_ptt)
+{
+	struct pci_dev *pdev = hisi_ptt->pdev;
+	int ret;
+
+	ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
+	if (ret < 0) {
+		pci_err(pdev, "failed to allocate irq vector, ret = %d\n", ret);
+		return ret;
+	}
+
+	ret = devm_add_action_or_reset(&pdev->dev, hisi_ptt_irq_free_vectors, pdev);
+	if (ret < 0)
+		return ret;
+
+	hisi_ptt->trace_irq = pci_irq_vector(pdev, HISI_PTT_TRACE_DMA_IRQ);
+	ret = devm_request_irq(&pdev->dev, hisi_ptt->trace_irq, hisi_ptt_isr,
+				IRQF_NOBALANCING | IRQF_NO_THREAD, DRV_NAME,
+				hisi_ptt);
+	if (ret) {
+		pci_err(pdev, "failed to request irq %d, ret = %d\n",
+			hisi_ptt->trace_irq, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void hisi_ptt_del_free_filter(struct hisi_ptt *hisi_ptt,
+				      struct hisi_ptt_filter_desc *filter)
+{
+	if (filter->is_port)
+		hisi_ptt->port_mask &= ~hisi_ptt_get_filter_val(filter->devid, true);
+
+	list_del(&filter->list);
+	kfree(filter->name);
+	kfree(filter);
+}
+
+static struct hisi_ptt_filter_desc *
+hisi_ptt_alloc_add_filter(struct hisi_ptt *hisi_ptt, u16 devid, bool is_port)
+{
+	struct hisi_ptt_filter_desc *filter;
+	u8 devfn = devid & 0xff;
+	char *filter_name;
+
+	filter_name = kasprintf(GFP_KERNEL, "%04x:%02x:%02x.%d", pci_domain_nr(hisi_ptt->pdev->bus),
+				 PCI_BUS_NUM(devid), PCI_SLOT(devfn), PCI_FUNC(devfn));
+	if (!filter_name) {
+		pci_err(hisi_ptt->pdev, "failed to allocate name for filter %04x:%02x:%02x.%d\n",
+			pci_domain_nr(hisi_ptt->pdev->bus), PCI_BUS_NUM(devid),
+			PCI_SLOT(devfn), PCI_FUNC(devfn));
+		return NULL;
+	}
+
+	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (!filter) {
+		pci_err(hisi_ptt->pdev, "failed to add filter for %s\n",
+			filter_name);
+		kfree(filter_name);
+		return NULL;
+	}
+
+	filter->name = filter_name;
+	filter->is_port = is_port;
+	filter->devid = devid;
+
+	if (filter->is_port) {
+		list_add_tail(&filter->list, &hisi_ptt->port_filters);
+
+		/* Update the available port mask */
+		hisi_ptt->port_mask |= hisi_ptt_get_filter_val(filter->devid, true);
+	} else {
+		list_add_tail(&filter->list, &hisi_ptt->req_filters);
+	}
+
+	return filter;
+}
+
+static ssize_t hisi_ptt_filter_show(struct device *dev, struct device_attribute *attr,
+				    char *buf)
+{
+	struct hisi_ptt_filter_desc *filter;
+	unsigned long filter_val;
+
+	filter = container_of(attr, struct hisi_ptt_filter_desc, attr);
+	filter_val = hisi_ptt_get_filter_val(filter->devid, filter->is_port) |
+		     (filter->is_port ? HISI_PTT_PMU_FILTER_IS_PORT : 0);
+
+	return sysfs_emit(buf, "0x%05lx\n", filter_val);
+}
+
+static int hisi_ptt_create_rp_filter_attr(struct hisi_ptt *hisi_ptt,
+					  struct hisi_ptt_filter_desc *filter)
+{
+	struct kobject *kobj = &hisi_ptt->hisi_ptt_pmu.dev->kobj;
+
+	sysfs_attr_init(&filter->attr.attr);
+	filter->attr.attr.name = filter->name;
+	filter->attr.attr.mode = 0400; /* DEVICE_ATTR_ADMIN_RO */
+	filter->attr.show = hisi_ptt_filter_show;
+
+	return sysfs_add_file_to_group(kobj, &filter->attr.attr,
+				       HISI_PTT_RP_FILTERS_GRP_NAME);
+}
+
+static void hisi_ptt_remove_rp_filter_attr(struct hisi_ptt *hisi_ptt,
+					  struct hisi_ptt_filter_desc *filter)
+{
+	struct kobject *kobj = &hisi_ptt->hisi_ptt_pmu.dev->kobj;
+
+	sysfs_remove_file_from_group(kobj, &filter->attr.attr,
+				     HISI_PTT_RP_FILTERS_GRP_NAME);
+}
+
+static int hisi_ptt_create_req_filter_attr(struct hisi_ptt *hisi_ptt,
+					   struct hisi_ptt_filter_desc *filter)
+{
+	struct kobject *kobj = &hisi_ptt->hisi_ptt_pmu.dev->kobj;
+
+	sysfs_attr_init(&filter->attr.attr);
+	filter->attr.attr.name = filter->name;
+	filter->attr.attr.mode = 0400; /* DEVICE_ATTR_ADMIN_RO */
+	filter->attr.show = hisi_ptt_filter_show;
+
+	return sysfs_add_file_to_group(kobj, &filter->attr.attr,
+				       HISI_PTT_REQ_FILTERS_GRP_NAME);
+}
+
+static void hisi_ptt_remove_req_filter_attr(struct hisi_ptt *hisi_ptt,
+					   struct hisi_ptt_filter_desc *filter)
+{
+	struct kobject *kobj = &hisi_ptt->hisi_ptt_pmu.dev->kobj;
+
+	sysfs_remove_file_from_group(kobj, &filter->attr.attr,
+				     HISI_PTT_REQ_FILTERS_GRP_NAME);
+}
+
+static int hisi_ptt_create_filter_attr(struct hisi_ptt *hisi_ptt,
+				       struct hisi_ptt_filter_desc *filter)
+{
+	int ret;
+
+	if (filter->is_port)
+		ret = hisi_ptt_create_rp_filter_attr(hisi_ptt, filter);
+	else
+		ret = hisi_ptt_create_req_filter_attr(hisi_ptt, filter);
+
+	if (ret)
+		pci_err(hisi_ptt->pdev, "failed to create sysfs attribute for filter %s\n",
+			filter->name);
+
+	return ret;
+}
+
+static void hisi_ptt_remove_filter_attr(struct hisi_ptt *hisi_ptt,
+					struct hisi_ptt_filter_desc *filter)
+{
+	if (filter->is_port)
+		hisi_ptt_remove_rp_filter_attr(hisi_ptt, filter);
+	else
+		hisi_ptt_remove_req_filter_attr(hisi_ptt, filter);
+}
+
+static void hisi_ptt_remove_all_filter_attributes(void *data)
+{
+	struct hisi_ptt_filter_desc *filter;
+	struct hisi_ptt *hisi_ptt = data;
+
+	mutex_lock(&hisi_ptt->filter_lock);
+
+	list_for_each_entry(filter, &hisi_ptt->req_filters, list)
+		hisi_ptt_remove_filter_attr(hisi_ptt, filter);
+
+	list_for_each_entry(filter, &hisi_ptt->port_filters, list)
+		hisi_ptt_remove_filter_attr(hisi_ptt, filter);
+
+	hisi_ptt->sysfs_inited = false;
+	mutex_unlock(&hisi_ptt->filter_lock);
+}
+
+static int hisi_ptt_init_filter_attributes(struct hisi_ptt *hisi_ptt)
+{
+	struct hisi_ptt_filter_desc *filter;
+	int ret;
+
+	mutex_lock(&hisi_ptt->filter_lock);
+
+	/*
+	 * Register the reset callback in the first stage. In reset we traverse
+	 * the filters list to remove the sysfs attributes so the callback can
+	 * be called safely even without below filter attributes creation.
+	 */
+	ret = devm_add_action(&hisi_ptt->pdev->dev,
+			      hisi_ptt_remove_all_filter_attributes,
+			      hisi_ptt);
+	if (ret)
+		goto out;
+
+	list_for_each_entry(filter, &hisi_ptt->port_filters, list) {
+		ret = hisi_ptt_create_filter_attr(hisi_ptt, filter);
+		if (ret)
+			goto out;
+	}
+
+	list_for_each_entry(filter, &hisi_ptt->req_filters, list) {
+		ret = hisi_ptt_create_filter_attr(hisi_ptt, filter);
+		if (ret)
+			goto out;
+	}
+
+	hisi_ptt->sysfs_inited = true;
+out:
+	mutex_unlock(&hisi_ptt->filter_lock);
+	return ret;
+}
+
+static void hisi_ptt_update_filters(struct work_struct *work)
+{
+	struct delayed_work *delayed_work = to_delayed_work(work);
+	struct hisi_ptt_filter_update_info info;
+	struct hisi_ptt_filter_desc *filter;
+	struct hisi_ptt *hisi_ptt;
+
+	hisi_ptt = container_of(delayed_work, struct hisi_ptt, work);
+
+	if (!mutex_trylock(&hisi_ptt->filter_lock)) {
+		schedule_delayed_work(&hisi_ptt->work, HISI_PTT_WORK_DELAY_MS);
+		return;
+	}
+
+	while (kfifo_get(&hisi_ptt->filter_update_kfifo, &info)) {
+		if (info.is_add) {
+			/*
+			 * Notify the users if failed to add this filter, others
+			 * still work and available. See the comments in
+			 * hisi_ptt_init_filters().
+			 */
+			filter = hisi_ptt_alloc_add_filter(hisi_ptt, info.devid, info.is_port);
+			if (!filter)
+				continue;
+
+			/*
+			 * If filters' sysfs entries hasn't been initialized,
+			 * then we're still at probe stage. Add the filters to
+			 * the list and later hisi_ptt_init_filter_attributes()
+			 * will create sysfs attributes for all the filters.
+			 */
+			if (hisi_ptt->sysfs_inited &&
+			    hisi_ptt_create_filter_attr(hisi_ptt, filter)) {
+				hisi_ptt_del_free_filter(hisi_ptt, filter);
+				continue;
+			}
+		} else {
+			struct hisi_ptt_filter_desc *tmp;
+			struct list_head *target_list;
+
+			target_list = info.is_port ? &hisi_ptt->port_filters :
+				      &hisi_ptt->req_filters;
+
+			list_for_each_entry_safe(filter, tmp, target_list, list)
+				if (filter->devid == info.devid) {
+					if (hisi_ptt->sysfs_inited)
+						hisi_ptt_remove_filter_attr(hisi_ptt, filter);
+
+					hisi_ptt_del_free_filter(hisi_ptt, filter);
+					break;
+				}
+		}
+	}
+
+	mutex_unlock(&hisi_ptt->filter_lock);
+}
+
+/*
+ * A PCI bus notifier is used here for dynamically updating the filter
+ * list.
+ */
+static int hisi_ptt_notifier_call(struct notifier_block *nb, unsigned long action,
+				  void *data)
+{
+	struct hisi_ptt *hisi_ptt = container_of(nb, struct hisi_ptt, hisi_ptt_nb);
+	struct hisi_ptt_filter_update_info info;
+	struct pci_dev *pdev, *root_port;
+	struct device *dev = data;
+	u32 port_devid;
+
+	pdev = to_pci_dev(dev);
+	root_port = pcie_find_root_port(pdev);
+	if (!root_port)
+		return 0;
+
+	port_devid = pci_dev_id(root_port);
+	if (port_devid < hisi_ptt->lower_bdf ||
+	    port_devid > hisi_ptt->upper_bdf)
+		return 0;
+
+	info.is_port = pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT;
+	info.devid = pci_dev_id(pdev);
+
+	switch (action) {
+	case BUS_NOTIFY_ADD_DEVICE:
+		info.is_add = true;
+		break;
+	case BUS_NOTIFY_DEL_DEVICE:
+		info.is_add = false;
+		break;
+	default:
+		return 0;
+	}
+
+	/*
+	 * The FIFO size is 16 which is sufficient for almost all the cases,
+	 * since each PCIe core will have most 8 Root Ports (typically only
+	 * 1~4 Root Ports). On failure log the failed filter and let user
+	 * handle it.
+	 */
+	if (kfifo_in_spinlocked(&hisi_ptt->filter_update_kfifo, &info, 1,
+				&hisi_ptt->filter_update_lock))
+		schedule_delayed_work(&hisi_ptt->work, 0);
+	else
+		pci_warn(hisi_ptt->pdev,
+			 "filter update fifo overflow for target %s\n",
+			 pci_name(pdev));
+
+	return 0;
+}
+
+static int hisi_ptt_init_filters(struct pci_dev *pdev, void *data)
+{
+	struct pci_dev *root_port = pcie_find_root_port(pdev);
+	struct hisi_ptt_filter_desc *filter;
+	struct hisi_ptt *hisi_ptt = data;
+	u32 port_devid;
+
+	if (!root_port)
+		return 0;
+
+	port_devid = pci_dev_id(root_port);
+	if (port_devid < hisi_ptt->lower_bdf ||
+	    port_devid > hisi_ptt->upper_bdf)
+		return 0;
+
+	/*
+	 * We won't fail the probe if filter allocation failed here. The filters
+	 * should be partial initialized and users would know which filter fails
+	 * through the log. Other functions of PTT device are still available.
+	 */
+	filter = hisi_ptt_alloc_add_filter(hisi_ptt, pci_dev_id(pdev),
+					    pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT);
+	if (!filter)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void hisi_ptt_release_filters(void *data)
+{
+	struct hisi_ptt_filter_desc *filter, *tmp;
+	struct hisi_ptt *hisi_ptt = data;
+
+	list_for_each_entry_safe(filter, tmp, &hisi_ptt->req_filters, list)
+		hisi_ptt_del_free_filter(hisi_ptt, filter);
+
+	list_for_each_entry_safe(filter, tmp, &hisi_ptt->port_filters, list)
+		hisi_ptt_del_free_filter(hisi_ptt, filter);
+}
+
+static int hisi_ptt_config_trace_buf(struct hisi_ptt *hisi_ptt)
+{
+	struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
+	struct device *dev = &hisi_ptt->pdev->dev;
+	int i;
+
+	ctrl->trace_buf = devm_kcalloc(dev, HISI_PTT_TRACE_BUF_CNT,
+				       sizeof(*ctrl->trace_buf), GFP_KERNEL);
+	if (!ctrl->trace_buf)
+		return -ENOMEM;
+
+	for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; ++i) {
+		ctrl->trace_buf[i].addr = dmam_alloc_coherent(dev, HISI_PTT_TRACE_BUF_SIZE,
+							     &ctrl->trace_buf[i].dma,
+							     GFP_KERNEL);
+		if (!ctrl->trace_buf[i].addr)
+			return -ENOMEM;
+	}
+
+	/* Configure the trace DMA buffer */
+	for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; i++) {
+		writel(lower_32_bits(ctrl->trace_buf[i].dma),
+		       hisi_ptt->iobase + HISI_PTT_TRACE_ADDR_BASE_LO_0 +
+		       i * HISI_PTT_TRACE_ADDR_STRIDE);
+		writel(upper_32_bits(ctrl->trace_buf[i].dma),
+		       hisi_ptt->iobase + HISI_PTT_TRACE_ADDR_BASE_HI_0 +
+		       i * HISI_PTT_TRACE_ADDR_STRIDE);
+	}
+	writel(HISI_PTT_TRACE_BUF_SIZE, hisi_ptt->iobase + HISI_PTT_TRACE_ADDR_SIZE);
+
+	return 0;
+}
+
+static int hisi_ptt_init_ctrls(struct hisi_ptt *hisi_ptt)
+{
+	struct pci_dev *pdev = hisi_ptt->pdev;
+	struct pci_bus *bus;
+	int ret;
+	u32 reg;
+
+	INIT_DELAYED_WORK(&hisi_ptt->work, hisi_ptt_update_filters);
+	INIT_KFIFO(hisi_ptt->filter_update_kfifo);
+	spin_lock_init(&hisi_ptt->filter_update_lock);
+
+	INIT_LIST_HEAD(&hisi_ptt->port_filters);
+	INIT_LIST_HEAD(&hisi_ptt->req_filters);
+	mutex_init(&hisi_ptt->filter_lock);
+
+	ret = hisi_ptt_config_trace_buf(hisi_ptt);
+	if (ret)
+		return ret;
+
+	/*
+	 * The device range register provides the information about the root
+	 * ports which the RCiEP can control and trace. The RCiEP and the root
+	 * ports which it supports are on the same PCIe core, with same domain
+	 * number but maybe different bus number. The device range register
+	 * will tell us which root ports we can support, Bit[31:16] indicates
+	 * the upper BDF numbers of the root port, while Bit[15:0] indicates
+	 * the lower.
+	 */
+	reg = readl(hisi_ptt->iobase + HISI_PTT_DEVICE_RANGE);
+	hisi_ptt->upper_bdf = FIELD_GET(HISI_PTT_DEVICE_RANGE_UPPER, reg);
+	hisi_ptt->lower_bdf = FIELD_GET(HISI_PTT_DEVICE_RANGE_LOWER, reg);
+
+	bus = pci_find_bus(pci_domain_nr(pdev->bus), PCI_BUS_NUM(hisi_ptt->upper_bdf));
+	if (bus)
+		pci_walk_bus(bus, hisi_ptt_init_filters, hisi_ptt);
+
+	ret = devm_add_action_or_reset(&pdev->dev, hisi_ptt_release_filters, hisi_ptt);
+	if (ret)
+		return ret;
+
+	hisi_ptt->trace_ctrl.on_cpu = -1;
+	return 0;
+}
+
+static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev));
+	const cpumask_t *cpumask = cpumask_of_node(dev_to_node(&hisi_ptt->pdev->dev));
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask);
+}
+static DEVICE_ATTR_RO(cpumask);
+
+static struct attribute *hisi_ptt_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL
+};
+
+static const struct attribute_group hisi_ptt_cpumask_attr_group = {
+	.attrs = hisi_ptt_cpumask_attrs,
+};
+
+/*
+ * Bit 19 indicates the filter type, 1 for Root Port filter and 0 for Requester
+ * filter. Bit[15:0] indicates the filter value, for Root Port filter it's
+ * a bit mask of desired ports and for Requester filter it's the Requester ID
+ * of the desired PCIe function. Bit[18:16] is reserved for extension.
+ *
+ * See hisi_ptt.rst documentation for detailed information.
+ */
+PMU_FORMAT_ATTR(filter,		"config:0-19");
+PMU_FORMAT_ATTR(direction,	"config:20-23");
+PMU_FORMAT_ATTR(type,		"config:24-31");
+PMU_FORMAT_ATTR(format,		"config:32-35");
+
+static struct attribute *hisi_ptt_pmu_format_attrs[] = {
+	&format_attr_filter.attr,
+	&format_attr_direction.attr,
+	&format_attr_type.attr,
+	&format_attr_format.attr,
+	NULL
+};
+
+static struct attribute_group hisi_ptt_pmu_format_group = {
+	.name = "format",
+	.attrs = hisi_ptt_pmu_format_attrs,
+};
+
+static ssize_t hisi_ptt_filter_multiselect_show(struct device *dev,
+						struct device_attribute *attr,
+						char *buf)
+{
+	struct dev_ext_attribute *ext_attr;
+
+	ext_attr = container_of(attr, struct dev_ext_attribute, attr);
+	return sysfs_emit(buf, "%s\n", (char *)ext_attr->var);
+}
+
+static struct dev_ext_attribute root_port_filters_multiselect = {
+	.attr = {
+		.attr = { .name = "multiselect", .mode = 0400 },
+		.show = hisi_ptt_filter_multiselect_show,
+	},
+	.var = "1",
+};
+
+static struct attribute *hisi_ptt_pmu_root_ports_attrs[] = {
+	&root_port_filters_multiselect.attr.attr,
+	NULL
+};
+
+static struct attribute_group hisi_ptt_pmu_root_ports_group = {
+	.name = HISI_PTT_RP_FILTERS_GRP_NAME,
+	.attrs = hisi_ptt_pmu_root_ports_attrs,
+};
+
+static struct dev_ext_attribute requester_filters_multiselect = {
+	.attr = {
+		.attr = { .name = "multiselect", .mode = 0400 },
+		.show = hisi_ptt_filter_multiselect_show,
+	},
+	.var = "0",
+};
+
+static struct attribute *hisi_ptt_pmu_requesters_attrs[] = {
+	&requester_filters_multiselect.attr.attr,
+	NULL
+};
+
+static struct attribute_group hisi_ptt_pmu_requesters_group = {
+	.name = HISI_PTT_REQ_FILTERS_GRP_NAME,
+	.attrs = hisi_ptt_pmu_requesters_attrs,
+};
+
+static const struct attribute_group *hisi_ptt_pmu_groups[] = {
+	&hisi_ptt_cpumask_attr_group,
+	&hisi_ptt_pmu_format_group,
+	&hisi_ptt_tune_group,
+	&hisi_ptt_pmu_root_ports_group,
+	&hisi_ptt_pmu_requesters_group,
+	NULL
+};
+
+static int hisi_ptt_trace_valid_direction(u32 val)
+{
+	/*
+	 * The direction values have different effects according to the data
+	 * format (specified in the parentheses). TLP set A/B means different
+	 * set of TLP types. See hisi_ptt.rst documentation for more details.
+	 */
+	static const u32 hisi_ptt_trace_available_direction[] = {
+		0,	/* inbound(4DW) or reserved(8DW) */
+		1,	/* outbound(4DW) */
+		2,	/* {in, out}bound(4DW) or inbound(8DW), TLP set A */
+		3,	/* {in, out}bound(4DW) or inbound(8DW), TLP set B */
+	};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(hisi_ptt_trace_available_direction); i++) {
+		if (val == hisi_ptt_trace_available_direction[i])
+			return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int hisi_ptt_trace_valid_type(u32 val)
+{
+	/* Different types can be set simultaneously */
+	static const u32 hisi_ptt_trace_available_type[] = {
+		1,	/* posted_request */
+		2,	/* non-posted_request */
+		4,	/* completion */
+	};
+	int i;
+
+	if (!val)
+		return -EINVAL;
+
+	/*
+	 * Walk the available list and clear the valid bits of
+	 * the config. If there is any resident bit after the
+	 * walk then the config is invalid.
+	 */
+	for (i = 0; i < ARRAY_SIZE(hisi_ptt_trace_available_type); i++)
+		val &= ~hisi_ptt_trace_available_type[i];
+
+	if (val)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int hisi_ptt_trace_valid_format(u32 val)
+{
+	static const u32 hisi_ptt_trace_availble_format[] = {
+		0,	/* 4DW */
+		1,	/* 8DW */
+	};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(hisi_ptt_trace_availble_format); i++) {
+		if (val == hisi_ptt_trace_availble_format[i])
+			return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int hisi_ptt_trace_valid_filter(struct hisi_ptt *hisi_ptt, u64 config)
+{
+	unsigned long val, port_mask = hisi_ptt->port_mask;
+	struct hisi_ptt_filter_desc *filter;
+	int ret = 0;
+
+	hisi_ptt->trace_ctrl.is_port = FIELD_GET(HISI_PTT_PMU_FILTER_IS_PORT, config);
+	val = FIELD_GET(HISI_PTT_PMU_FILTER_VAL_MASK, config);
+
+	/*
+	 * Port filters are defined as bit mask. For port filters, check
+	 * the bits in the @val are within the range of hisi_ptt->port_mask
+	 * and whether it's empty or not, otherwise user has specified
+	 * some unsupported root ports.
+	 *
+	 * For Requester ID filters, walk the available filter list to see
+	 * whether we have one matched.
+	 */
+	mutex_lock(&hisi_ptt->filter_lock);
+	if (!hisi_ptt->trace_ctrl.is_port) {
+		list_for_each_entry(filter, &hisi_ptt->req_filters, list) {
+			if (val == hisi_ptt_get_filter_val(filter->devid, filter->is_port))
+				goto out;
+		}
+	} else if (bitmap_subset(&val, &port_mask, BITS_PER_LONG)) {
+		goto out;
+	}
+
+	ret = -EINVAL;
+out:
+	mutex_unlock(&hisi_ptt->filter_lock);
+	return ret;
+}
+
+static void hisi_ptt_pmu_init_configs(struct hisi_ptt *hisi_ptt, struct perf_event *event)
+{
+	struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
+	u32 val;
+
+	val = FIELD_GET(HISI_PTT_PMU_FILTER_VAL_MASK, event->attr.config);
+	hisi_ptt->trace_ctrl.filter = val;
+
+	val = FIELD_GET(HISI_PTT_PMU_DIRECTION_MASK, event->attr.config);
+	ctrl->direction = val;
+
+	val = FIELD_GET(HISI_PTT_PMU_TYPE_MASK, event->attr.config);
+	ctrl->type = val;
+
+	val = FIELD_GET(HISI_PTT_PMU_FORMAT_MASK, event->attr.config);
+	ctrl->format = val;
+}
+
+static int hisi_ptt_pmu_event_init(struct perf_event *event)
+{
+	struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
+	int ret;
+	u32 val;
+
+	if (event->cpu < 0) {
+		dev_dbg(event->pmu->dev, "Per-task mode not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (event->attach_state & PERF_ATTACH_TASK)
+		return -EOPNOTSUPP;
+
+	if (event->attr.type != hisi_ptt->hisi_ptt_pmu.type)
+		return -ENOENT;
+
+	ret = hisi_ptt_trace_valid_filter(hisi_ptt, event->attr.config);
+	if (ret < 0)
+		return ret;
+
+	val = FIELD_GET(HISI_PTT_PMU_DIRECTION_MASK, event->attr.config);
+	ret = hisi_ptt_trace_valid_direction(val);
+	if (ret < 0)
+		return ret;
+
+	val = FIELD_GET(HISI_PTT_PMU_TYPE_MASK, event->attr.config);
+	ret = hisi_ptt_trace_valid_type(val);
+	if (ret < 0)
+		return ret;
+
+	val = FIELD_GET(HISI_PTT_PMU_FORMAT_MASK, event->attr.config);
+	return hisi_ptt_trace_valid_format(val);
+}
+
+static void *hisi_ptt_pmu_setup_aux(struct perf_event *event, void **pages,
+				    int nr_pages, bool overwrite)
+{
+	struct hisi_ptt_pmu_buf *buf;
+	struct page **pagelist;
+	int i;
+
+	if (overwrite) {
+		dev_warn(event->pmu->dev, "Overwrite mode is not supported\n");
+		return NULL;
+	}
+
+	/* If the pages size less than buffers, we cannot start trace */
+	if (nr_pages < HISI_PTT_TRACE_TOTAL_BUF_SIZE / PAGE_SIZE)
+		return NULL;
+
+	buf = kzalloc(sizeof(*buf), GFP_KERNEL);
+	if (!buf)
+		return NULL;
+
+	pagelist = kcalloc(nr_pages, sizeof(*pagelist), GFP_KERNEL);
+	if (!pagelist)
+		goto err;
+
+	for (i = 0; i < nr_pages; i++)
+		pagelist[i] = virt_to_page(pages[i]);
+
+	buf->base = vmap(pagelist, nr_pages, VM_MAP, PAGE_KERNEL);
+	if (!buf->base) {
+		kfree(pagelist);
+		goto err;
+	}
+
+	buf->nr_pages = nr_pages;
+	buf->length = nr_pages * PAGE_SIZE;
+	buf->pos = 0;
+
+	kfree(pagelist);
+	return buf;
+err:
+	kfree(buf);
+	return NULL;
+}
+
+static void hisi_ptt_pmu_free_aux(void *aux)
+{
+	struct hisi_ptt_pmu_buf *buf = aux;
+
+	vunmap(buf->base);
+	kfree(buf);
+}
+
+static void hisi_ptt_pmu_start(struct perf_event *event, int flags)
+{
+	struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
+	struct perf_output_handle *handle = &hisi_ptt->trace_ctrl.handle;
+	struct hw_perf_event *hwc = &event->hw;
+	struct device *dev = event->pmu->dev;
+	struct hisi_ptt_pmu_buf *buf;
+	int cpu = event->cpu;
+	int ret;
+
+	hwc->state = 0;
+
+	/* Serialize the perf process if user specified several CPUs */
+	spin_lock(&hisi_ptt->pmu_lock);
+	if (hisi_ptt->trace_ctrl.started) {
+		dev_dbg(dev, "trace has already started\n");
+		goto stop;
+	}
+
+	/*
+	 * Handle the interrupt on the same cpu which starts the trace to avoid
+	 * context mismatch. Otherwise we'll trigger the WARN from the perf
+	 * core in event_function_local(). If CPU passed is offline we'll fail
+	 * here, just log it since we can do nothing here.
+	 */
+	ret = irq_set_affinity(hisi_ptt->trace_irq, cpumask_of(cpu));
+	if (ret)
+		dev_warn(dev, "failed to set the affinity of trace interrupt\n");
+
+	hisi_ptt->trace_ctrl.on_cpu = cpu;
+
+	buf = perf_aux_output_begin(handle, event);
+	if (!buf) {
+		dev_dbg(dev, "aux output begin failed\n");
+		goto stop;
+	}
+
+	buf->pos = handle->head % buf->length;
+
+	hisi_ptt_pmu_init_configs(hisi_ptt, event);
+
+	ret = hisi_ptt_trace_start(hisi_ptt);
+	if (ret) {
+		dev_dbg(dev, "trace start failed, ret = %d\n", ret);
+		perf_aux_output_end(handle, 0);
+		goto stop;
+	}
+
+	spin_unlock(&hisi_ptt->pmu_lock);
+	return;
+stop:
+	event->hw.state |= PERF_HES_STOPPED;
+	spin_unlock(&hisi_ptt->pmu_lock);
+}
+
+static void hisi_ptt_pmu_stop(struct perf_event *event, int flags)
+{
+	struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (hwc->state & PERF_HES_STOPPED)
+		return;
+
+	spin_lock(&hisi_ptt->pmu_lock);
+	if (hisi_ptt->trace_ctrl.started) {
+		hisi_ptt_trace_end(hisi_ptt);
+
+		if (!hisi_ptt_wait_trace_hw_idle(hisi_ptt))
+			dev_warn(event->pmu->dev, "Device is still busy\n");
+
+		hisi_ptt_update_aux(hisi_ptt, hisi_ptt->trace_ctrl.buf_index, true);
+	}
+	spin_unlock(&hisi_ptt->pmu_lock);
+
+	hwc->state |= PERF_HES_STOPPED;
+	perf_event_update_userpage(event);
+	hwc->state |= PERF_HES_UPTODATE;
+}
+
+static int hisi_ptt_pmu_add(struct perf_event *event, int flags)
+{
+	struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int cpu = event->cpu;
+
+	/* Only allow the cpus on the device's node to add the event */
+	if (!cpumask_test_cpu(cpu, cpumask_of_node(dev_to_node(&hisi_ptt->pdev->dev))))
+		return 0;
+
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+	if (flags & PERF_EF_START) {
+		hisi_ptt_pmu_start(event, PERF_EF_RELOAD);
+		if (hwc->state & PERF_HES_STOPPED)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void hisi_ptt_pmu_del(struct perf_event *event, int flags)
+{
+	hisi_ptt_pmu_stop(event, PERF_EF_UPDATE);
+}
+
+static void hisi_ptt_pmu_read(struct perf_event *event)
+{
+}
+
+static void hisi_ptt_remove_cpuhp_instance(void *hotplug_node)
+{
+	cpuhp_state_remove_instance_nocalls(hisi_ptt_pmu_online, hotplug_node);
+}
+
+static void hisi_ptt_unregister_pmu(void *pmu)
+{
+	perf_pmu_unregister(pmu);
+}
+
+static int hisi_ptt_register_pmu(struct hisi_ptt *hisi_ptt)
+{
+	u16 core_id, sicl_id;
+	char *pmu_name;
+	u32 reg;
+	int ret;
+
+	ret = cpuhp_state_add_instance_nocalls(hisi_ptt_pmu_online,
+					       &hisi_ptt->hotplug_node);
+	if (ret)
+		return ret;
+
+	ret = devm_add_action_or_reset(&hisi_ptt->pdev->dev,
+				       hisi_ptt_remove_cpuhp_instance,
+				       &hisi_ptt->hotplug_node);
+	if (ret)
+		return ret;
+
+	mutex_init(&hisi_ptt->tune_lock);
+	spin_lock_init(&hisi_ptt->pmu_lock);
+
+	hisi_ptt->hisi_ptt_pmu = (struct pmu) {
+		.module		= THIS_MODULE,
+		.capabilities	= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_NO_EXCLUDE,
+		.task_ctx_nr	= perf_sw_context,
+		.attr_groups	= hisi_ptt_pmu_groups,
+		.event_init	= hisi_ptt_pmu_event_init,
+		.setup_aux	= hisi_ptt_pmu_setup_aux,
+		.free_aux	= hisi_ptt_pmu_free_aux,
+		.start		= hisi_ptt_pmu_start,
+		.stop		= hisi_ptt_pmu_stop,
+		.add		= hisi_ptt_pmu_add,
+		.del		= hisi_ptt_pmu_del,
+		.read		= hisi_ptt_pmu_read,
+	};
+
+	reg = readl(hisi_ptt->iobase + HISI_PTT_LOCATION);
+	core_id = FIELD_GET(HISI_PTT_CORE_ID, reg);
+	sicl_id = FIELD_GET(HISI_PTT_SICL_ID, reg);
+
+	pmu_name = devm_kasprintf(&hisi_ptt->pdev->dev, GFP_KERNEL, "hisi_ptt%u_%u",
+				  sicl_id, core_id);
+	if (!pmu_name)
+		return -ENOMEM;
+
+	ret = perf_pmu_register(&hisi_ptt->hisi_ptt_pmu, pmu_name, -1);
+	if (ret)
+		return ret;
+
+	return devm_add_action_or_reset(&hisi_ptt->pdev->dev,
+					hisi_ptt_unregister_pmu,
+					&hisi_ptt->hisi_ptt_pmu);
+}
+
+static void hisi_ptt_unregister_filter_update_notifier(void *data)
+{
+	struct hisi_ptt *hisi_ptt = data;
+
+	bus_unregister_notifier(&pci_bus_type, &hisi_ptt->hisi_ptt_nb);
+
+	/* Cancel any work that has been queued */
+	cancel_delayed_work_sync(&hisi_ptt->work);
+}
+
+/* Register the bus notifier for dynamically updating the filter list */
+static int hisi_ptt_register_filter_update_notifier(struct hisi_ptt *hisi_ptt)
+{
+	int ret;
+
+	hisi_ptt->hisi_ptt_nb.notifier_call = hisi_ptt_notifier_call;
+	ret = bus_register_notifier(&pci_bus_type, &hisi_ptt->hisi_ptt_nb);
+	if (ret)
+		return ret;
+
+	return devm_add_action_or_reset(&hisi_ptt->pdev->dev,
+					hisi_ptt_unregister_filter_update_notifier,
+					hisi_ptt);
+}
+
+/*
+ * The DMA of PTT trace can only use direct mappings due to some
+ * hardware restriction. Check whether there is no IOMMU or the
+ * policy of the IOMMU domain is passthrough, otherwise the trace
+ * cannot work.
+ *
+ * The PTT device is supposed to behind an ARM SMMUv3, which
+ * should have passthrough the device by a quirk.
+ */
+static int hisi_ptt_check_iommu_mapping(struct pci_dev *pdev)
+{
+	struct iommu_domain *iommu_domain;
+
+	iommu_domain = iommu_get_domain_for_dev(&pdev->dev);
+	if (!iommu_domain || iommu_domain->type == IOMMU_DOMAIN_IDENTITY)
+		return 0;
+
+	return -EOPNOTSUPP;
+}
+
+static int hisi_ptt_probe(struct pci_dev *pdev,
+			  const struct pci_device_id *id)
+{
+	struct hisi_ptt *hisi_ptt;
+	int ret;
+
+	ret = hisi_ptt_check_iommu_mapping(pdev);
+	if (ret) {
+		pci_err(pdev, "requires direct DMA mappings\n");
+		return ret;
+	}
+
+	hisi_ptt = devm_kzalloc(&pdev->dev, sizeof(*hisi_ptt), GFP_KERNEL);
+	if (!hisi_ptt)
+		return -ENOMEM;
+
+	hisi_ptt->pdev = pdev;
+	pci_set_drvdata(pdev, hisi_ptt);
+
+	ret = pcim_enable_device(pdev);
+	if (ret) {
+		pci_err(pdev, "failed to enable device, ret = %d\n", ret);
+		return ret;
+	}
+
+	ret = pcim_iomap_regions(pdev, BIT(2), DRV_NAME);
+	if (ret) {
+		pci_err(pdev, "failed to remap io memory, ret = %d\n", ret);
+		return ret;
+	}
+
+	hisi_ptt->iobase = pcim_iomap_table(pdev)[2];
+
+	ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
+	if (ret) {
+		pci_err(pdev, "failed to set 64 bit dma mask, ret = %d\n", ret);
+		return ret;
+	}
+
+	pci_set_master(pdev);
+
+	ret = hisi_ptt_register_irq(hisi_ptt);
+	if (ret)
+		return ret;
+
+	ret = hisi_ptt_init_ctrls(hisi_ptt);
+	if (ret) {
+		pci_err(pdev, "failed to init controls, ret = %d\n", ret);
+		return ret;
+	}
+
+	ret = hisi_ptt_register_filter_update_notifier(hisi_ptt);
+	if (ret)
+		pci_warn(pdev, "failed to register filter update notifier, ret = %d", ret);
+
+	ret = hisi_ptt_register_pmu(hisi_ptt);
+	if (ret) {
+		pci_err(pdev, "failed to register PMU device, ret = %d", ret);
+		return ret;
+	}
+
+	ret = hisi_ptt_init_filter_attributes(hisi_ptt);
+	if (ret) {
+		pci_err(pdev, "failed to init sysfs filter attributes, ret = %d", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static const struct pci_device_id hisi_ptt_id_tbl[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, 0xa12e) },
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, hisi_ptt_id_tbl);
+
+static struct pci_driver hisi_ptt_driver = {
+	.name = DRV_NAME,
+	.id_table = hisi_ptt_id_tbl,
+	.probe = hisi_ptt_probe,
+};
+
+static int hisi_ptt_cpu_teardown(unsigned int cpu, struct hlist_node *node)
+{
+	struct hisi_ptt *hisi_ptt;
+	struct device *dev;
+	int target, src;
+
+	hisi_ptt = hlist_entry_safe(node, struct hisi_ptt, hotplug_node);
+	src = hisi_ptt->trace_ctrl.on_cpu;
+	dev = hisi_ptt->hisi_ptt_pmu.dev;
+
+	if (!hisi_ptt->trace_ctrl.started || src != cpu)
+		return 0;
+
+	target = cpumask_any_but(cpumask_of_node(dev_to_node(&hisi_ptt->pdev->dev)), cpu);
+	if (target >= nr_cpu_ids) {
+		dev_err(dev, "no available cpu for perf context migration\n");
+		return 0;
+	}
+
+	perf_pmu_migrate_context(&hisi_ptt->hisi_ptt_pmu, src, target);
+
+	/*
+	 * Also make sure the interrupt bind to the migrated CPU as well. Warn
+	 * the user on failure here.
+	 */
+	if (irq_set_affinity(hisi_ptt->trace_irq, cpumask_of(target)))
+		dev_warn(dev, "failed to set the affinity of trace interrupt\n");
+
+	hisi_ptt->trace_ctrl.on_cpu = target;
+	return 0;
+}
+
+static int __init hisi_ptt_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, DRV_NAME, NULL,
+				      hisi_ptt_cpu_teardown);
+	if (ret < 0)
+		return ret;
+	hisi_ptt_pmu_online = ret;
+
+	ret = pci_register_driver(&hisi_ptt_driver);
+	if (ret)
+		cpuhp_remove_multi_state(hisi_ptt_pmu_online);
+
+	return ret;
+}
+module_init(hisi_ptt_init);
+
+static void __exit hisi_ptt_exit(void)
+{
+	pci_unregister_driver(&hisi_ptt_driver);
+	cpuhp_remove_multi_state(hisi_ptt_pmu_online);
+}
+module_exit(hisi_ptt_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Yicong Yang <yangyicong@hisilicon.com>");
+MODULE_DESCRIPTION("Driver for HiSilicon PCIe tune and trace device");
diff --git a/drivers/hwtracing/ptt/hisi_ptt.h b/drivers/hwtracing/ptt/hisi_ptt.h
new file mode 100644
index 0000000000..e17f045d7e
--- /dev/null
+++ b/drivers/hwtracing/ptt/hisi_ptt.h
@@ -0,0 +1,256 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Driver for HiSilicon PCIe tune and trace device
+ *
+ * Copyright (c) 2022 HiSilicon Technologies Co., Ltd.
+ * Author: Yicong Yang <yangyicong@hisilicon.com>
+ */
+
+#ifndef _HISI_PTT_H
+#define _HISI_PTT_H
+
+#include <linux/bits.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/kfifo.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/pci.h>
+#include <linux/perf_event.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+#define DRV_NAME "hisi_ptt"
+
+/*
+ * The definition of the device registers and register fields.
+ */
+#define HISI_PTT_TUNING_CTRL		0x0000
+#define   HISI_PTT_TUNING_CTRL_CODE	GENMASK(15, 0)
+#define   HISI_PTT_TUNING_CTRL_SUB	GENMASK(23, 16)
+#define HISI_PTT_TUNING_DATA		0x0004
+#define   HISI_PTT_TUNING_DATA_VAL_MASK	GENMASK(15, 0)
+#define HISI_PTT_TRACE_ADDR_SIZE	0x0800
+#define HISI_PTT_TRACE_ADDR_BASE_LO_0	0x0810
+#define HISI_PTT_TRACE_ADDR_BASE_HI_0	0x0814
+#define HISI_PTT_TRACE_ADDR_STRIDE	0x8
+#define HISI_PTT_TRACE_CTRL		0x0850
+#define   HISI_PTT_TRACE_CTRL_EN	BIT(0)
+#define   HISI_PTT_TRACE_CTRL_RST	BIT(1)
+#define   HISI_PTT_TRACE_CTRL_RXTX_SEL	GENMASK(3, 2)
+#define   HISI_PTT_TRACE_CTRL_TYPE_SEL	GENMASK(7, 4)
+#define   HISI_PTT_TRACE_CTRL_DATA_FORMAT	BIT(14)
+#define   HISI_PTT_TRACE_CTRL_FILTER_MODE	BIT(15)
+#define   HISI_PTT_TRACE_CTRL_TARGET_SEL	GENMASK(31, 16)
+#define HISI_PTT_TRACE_INT_STAT		0x0890
+#define   HISI_PTT_TRACE_INT_STAT_MASK	GENMASK(3, 0)
+#define HISI_PTT_TRACE_INT_MASK		0x0894
+#define HISI_PTT_TUNING_INT_STAT	0x0898
+#define   HISI_PTT_TUNING_INT_STAT_MASK	BIT(0)
+#define HISI_PTT_TRACE_WR_STS		0x08a0
+#define   HISI_PTT_TRACE_WR_STS_WRITE	GENMASK(27, 0)
+#define   HISI_PTT_TRACE_WR_STS_BUFFER	GENMASK(29, 28)
+#define HISI_PTT_TRACE_STS		0x08b0
+#define   HISI_PTT_TRACE_IDLE		BIT(0)
+#define HISI_PTT_DEVICE_RANGE		0x0fe0
+#define   HISI_PTT_DEVICE_RANGE_UPPER	GENMASK(31, 16)
+#define   HISI_PTT_DEVICE_RANGE_LOWER	GENMASK(15, 0)
+#define HISI_PTT_LOCATION		0x0fe8
+#define   HISI_PTT_CORE_ID		GENMASK(15, 0)
+#define   HISI_PTT_SICL_ID		GENMASK(31, 16)
+
+/* Parameters of PTT trace DMA part. */
+#define HISI_PTT_TRACE_DMA_IRQ			0
+#define HISI_PTT_TRACE_BUF_CNT			4
+#define HISI_PTT_TRACE_BUF_SIZE			SZ_4M
+#define HISI_PTT_TRACE_TOTAL_BUF_SIZE		(HISI_PTT_TRACE_BUF_SIZE * \
+						 HISI_PTT_TRACE_BUF_CNT)
+/* Wait time for hardware DMA to reset */
+#define HISI_PTT_RESET_TIMEOUT_US	10UL
+#define HISI_PTT_RESET_POLL_INTERVAL_US	1UL
+/* Poll timeout and interval for waiting hardware work to finish */
+#define HISI_PTT_WAIT_TUNE_TIMEOUT_US	1000000UL
+#define HISI_PTT_WAIT_TRACE_TIMEOUT_US	100UL
+#define HISI_PTT_WAIT_POLL_INTERVAL_US	10UL
+
+/* FIFO size for dynamically updating the PTT trace filter list. */
+#define HISI_PTT_FILTER_UPDATE_FIFO_SIZE	16
+/* Delay time for filter updating work */
+#define HISI_PTT_WORK_DELAY_MS			100UL
+
+#define HISI_PCIE_CORE_PORT_ID(devfn)	((PCI_SLOT(devfn) & 0x7) << 1)
+
+/* Definition of the PMU configs */
+#define HISI_PTT_PMU_FILTER_IS_PORT	BIT(19)
+#define HISI_PTT_PMU_FILTER_VAL_MASK	GENMASK(15, 0)
+#define HISI_PTT_PMU_DIRECTION_MASK	GENMASK(23, 20)
+#define HISI_PTT_PMU_TYPE_MASK		GENMASK(31, 24)
+#define HISI_PTT_PMU_FORMAT_MASK	GENMASK(35, 32)
+
+/**
+ * struct hisi_ptt_tune_desc - Describe tune event for PTT tune
+ * @hisi_ptt:   PTT device this tune event belongs to
+ * @name:       name of this event
+ * @event_code: code of the event
+ */
+struct hisi_ptt_tune_desc {
+	struct hisi_ptt *hisi_ptt;
+	const char *name;
+	u32 event_code;
+};
+
+/**
+ * struct hisi_ptt_dma_buffer - Describe a single trace buffer of PTT trace.
+ *                              The detail of the data format is described
+ *                              in the documentation of PTT device.
+ * @dma:   DMA address of this buffer visible to the device
+ * @addr:  virtual address of this buffer visible to the cpu
+ */
+struct hisi_ptt_dma_buffer {
+	dma_addr_t dma;
+	void *addr;
+};
+
+/**
+ * struct hisi_ptt_trace_ctrl - Control and status of PTT trace
+ * @trace_buf: array of the trace buffers for holding the trace data.
+ *             the length will be HISI_PTT_TRACE_BUF_CNT.
+ * @handle:    perf output handle of current trace session
+ * @buf_index: the index of current using trace buffer
+ * @on_cpu:    current tracing cpu
+ * @started:   current trace status, true for started
+ * @is_port:   whether we're tracing root port or not
+ * @direction: direction of the TLP headers to trace
+ * @filter:    filter value for tracing the TLP headers
+ * @format:    format of the TLP headers to trace
+ * @type:      type of the TLP headers to trace
+ */
+struct hisi_ptt_trace_ctrl {
+	struct hisi_ptt_dma_buffer *trace_buf;
+	struct perf_output_handle handle;
+	u32 buf_index;
+	int on_cpu;
+	bool started;
+	bool is_port;
+	u32 direction:2;
+	u32 filter:16;
+	u32 format:1;
+	u32 type:4;
+};
+
+/*
+ * sysfs attribute group name for root port filters and requester filters:
+ * /sys/devices/hisi_ptt<sicl_id>_<core_id>/root_port_filters
+ * and
+ * /sys/devices/hisi_ptt<sicl_id>_<core_id>/requester_filters
+ */
+#define HISI_PTT_RP_FILTERS_GRP_NAME	"root_port_filters"
+#define HISI_PTT_REQ_FILTERS_GRP_NAME	"requester_filters"
+
+/**
+ * struct hisi_ptt_filter_desc - Descriptor of the PTT trace filter
+ * @attr:    sysfs attribute of this filter
+ * @list:    entry of this descriptor in the filter list
+ * @is_port: the PCI device of the filter is a Root Port or not
+ * @name:    name of this filter, same as the name of the related PCI device
+ * @devid:   the PCI device's devid of the filter
+ */
+struct hisi_ptt_filter_desc {
+	struct device_attribute attr;
+	struct list_head list;
+	bool is_port;
+	char *name;
+	u16 devid;
+};
+
+/**
+ * struct hisi_ptt_filter_update_info - Information for PTT filter updating
+ * @is_port:    the PCI device to update is a Root Port or not
+ * @is_add:     adding to the filter or not
+ * @devid:      the PCI device's devid of the filter
+ */
+struct hisi_ptt_filter_update_info {
+	bool is_port;
+	bool is_add;
+	u16 devid;
+};
+
+/**
+ * struct hisi_ptt_pmu_buf - Descriptor of the AUX buffer of PTT trace
+ * @length:   size of the AUX buffer
+ * @nr_pages: number of pages of the AUX buffer
+ * @base:     start address of AUX buffer
+ * @pos:      position in the AUX buffer to commit traced data
+ */
+struct hisi_ptt_pmu_buf {
+	size_t length;
+	int nr_pages;
+	void *base;
+	long pos;
+};
+
+/**
+ * struct hisi_ptt - Per PTT device data
+ * @trace_ctrl:   the control information of PTT trace
+ * @hisi_ptt_nb:  dynamic filter update notifier
+ * @hotplug_node: node for register cpu hotplug event
+ * @hisi_ptt_pmu: the pum device of trace
+ * @iobase:       base IO address of the device
+ * @pdev:         pci_dev of this PTT device
+ * @tune_lock:    lock to serialize the tune process
+ * @pmu_lock:     lock to serialize the perf process
+ * @trace_irq:    interrupt number used by trace
+ * @upper_bdf:    the upper BDF range of the PCI devices managed by this PTT device
+ * @lower_bdf:    the lower BDF range of the PCI devices managed by this PTT device
+ * @port_filters: the filter list of root ports
+ * @req_filters:  the filter list of requester ID
+ * @filter_lock:  lock to protect the filters
+ * @sysfs_inited: whether the filters' sysfs entries has been initialized
+ * @port_mask:    port mask of the managed root ports
+ * @work:         delayed work for filter updating
+ * @filter_update_lock: spinlock to protect the filter update fifo
+ * @filter_update_fifo: fifo of the filters waiting to update the filter list
+ */
+struct hisi_ptt {
+	struct hisi_ptt_trace_ctrl trace_ctrl;
+	struct notifier_block hisi_ptt_nb;
+	struct hlist_node hotplug_node;
+	struct pmu hisi_ptt_pmu;
+	void __iomem *iobase;
+	struct pci_dev *pdev;
+	struct mutex tune_lock;
+	spinlock_t pmu_lock;
+	int trace_irq;
+	u32 upper_bdf;
+	u32 lower_bdf;
+
+	/*
+	 * The trace TLP headers can either be filtered by certain
+	 * root port, or by the requester ID. Organize the filters
+	 * by @port_filters and @req_filters here. The mask of all
+	 * the valid ports is also cached for doing sanity check
+	 * of user input.
+	 */
+	struct list_head port_filters;
+	struct list_head req_filters;
+	struct mutex filter_lock;
+	bool sysfs_inited;
+	u16 port_mask;
+
+	/*
+	 * We use a delayed work here to avoid indefinitely waiting for
+	 * the hisi_ptt->mutex which protecting the filter list. The
+	 * work will be delayed only if the mutex can not be held,
+	 * otherwise no delay will be applied.
+	 */
+	struct delayed_work work;
+	spinlock_t filter_update_lock;
+	DECLARE_KFIFO(filter_update_kfifo, struct hisi_ptt_filter_update_info,
+		      HISI_PTT_FILTER_UPDATE_FIFO_SIZE);
+};
+
+#define to_hisi_ptt(pmu) container_of(pmu, struct hisi_ptt, hisi_ptt_pmu)
+
+#endif /* _HISI_PTT_H */
diff --git a/drivers/hwtracing/stm/Kconfig b/drivers/hwtracing/stm/Kconfig
new file mode 100644
index 0000000000..eda6b11d40
--- /dev/null
+++ b/drivers/hwtracing/stm/Kconfig
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config STM
+	tristate "System Trace Module devices"
+	select CONFIGFS_FS
+	help
+	  A System Trace Module (STM) is a device exporting data in System
+	  Trace Protocol (STP) format as defined by MIPI STP standards.
+	  Examples of such devices are Intel(R) Trace Hub and Coresight STM.
+
+	  Say Y here to enable System Trace Module device support.
+
+if STM
+
+config STM_PROTO_BASIC
+	tristate "Basic STM framing protocol driver"
+	default CONFIG_STM
+	help
+	  This is a simple framing protocol for sending data over STM
+	  devices. This was the protocol that the STM framework used
+	  exclusively until the MIPI SyS-T support was added. Use this
+	  driver for compatibility with your existing STM setup.
+
+	  The receiving side only needs to be able to decode the MIPI
+	  STP protocol in order to extract the data.
+
+	  If you want to be able to use the basic protocol or want the
+	  backwards compatibility for your existing setup, say Y.
+
+config STM_PROTO_SYS_T
+	tristate "MIPI SyS-T STM framing protocol driver"
+	default CONFIG_STM
+	help
+	  This is an implementation of MIPI SyS-T protocol to be used
+	  over the STP transport. In addition to the data payload, it
+	  also carries additional metadata for time correlation, better
+	  means of trace source identification, etc.
+
+	  The receiving side must be able to decode this protocol in
+	  addition to the MIPI STP, in order to extract the data.
+
+	  If you don't know what this is, say N.
+
+config STM_DUMMY
+	tristate "Dummy STM driver"
+	help
+	  This is a simple dummy device that pretends to be an stm device
+	  and discards your data. Use for stm class testing.
+
+	  If you don't know what this is, say N.
+
+config STM_SOURCE_CONSOLE
+	tristate "Kernel console over STM devices"
+	help
+	  This is a kernel space trace source that sends kernel log
+	  messages to trace hosts over STM devices.
+
+	  If you want to send kernel console messages over STM devices,
+	  say Y.
+
+config STM_SOURCE_HEARTBEAT
+	tristate "Heartbeat over STM devices"
+	help
+	  This is a kernel space trace source that sends periodic
+	  heartbeat messages to trace hosts over STM devices. It is
+	  also useful for testing stm class drivers and the stm class
+	  framework itself.
+
+	  If you want to send heartbeat messages over STM devices,
+	  say Y.
+
+config STM_SOURCE_FTRACE
+	tristate "Copy the output from kernel Ftrace to STM engine"
+	depends on TRACING
+	help
+	  This option can be used to copy the output from kernel Ftrace
+	  to STM engine. Enabling this option will introduce a slight
+	  timing effect.
+
+	  If you want to send kernel Ftrace messages over STM devices,
+	  say Y.
+
+endif
diff --git a/drivers/hwtracing/stm/Makefile b/drivers/hwtracing/stm/Makefile
new file mode 100644
index 0000000000..1692fcd292
--- /dev/null
+++ b/drivers/hwtracing/stm/Makefile
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_STM)	+= stm_core.o
+
+stm_core-y		:= core.o policy.o
+
+obj-$(CONFIG_STM_PROTO_BASIC) += stm_p_basic.o
+obj-$(CONFIG_STM_PROTO_SYS_T) += stm_p_sys-t.o
+
+stm_p_basic-y		:= p_basic.o
+stm_p_sys-t-y		:= p_sys-t.o
+
+obj-$(CONFIG_STM_DUMMY)	+= dummy_stm.o
+
+obj-$(CONFIG_STM_SOURCE_CONSOLE)	+= stm_console.o
+obj-$(CONFIG_STM_SOURCE_HEARTBEAT)	+= stm_heartbeat.o
+obj-$(CONFIG_STM_SOURCE_FTRACE)		+= stm_ftrace.o
+
+stm_console-y		:= console.o
+stm_heartbeat-y		:= heartbeat.o
+stm_ftrace-y		:= ftrace.o
diff --git a/drivers/hwtracing/stm/console.c b/drivers/hwtracing/stm/console.c
new file mode 100644
index 0000000000..a00f65e217
--- /dev/null
+++ b/drivers/hwtracing/stm/console.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Simple kernel console driver for STM devices
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * STM console will send kernel messages over STM devices to a trace host.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/console.h>
+#include <linux/slab.h>
+#include <linux/stm.h>
+
+static int stm_console_link(struct stm_source_data *data);
+static void stm_console_unlink(struct stm_source_data *data);
+
+static struct stm_console {
+	struct stm_source_data	data;
+	struct console		console;
+} stm_console = {
+	.data	= {
+		.name		= "console",
+		.nr_chans	= 1,
+		.link		= stm_console_link,
+		.unlink		= stm_console_unlink,
+	},
+};
+
+static void
+stm_console_write(struct console *con, const char *buf, unsigned len)
+{
+	struct stm_console *sc = container_of(con, struct stm_console, console);
+
+	stm_source_write(&sc->data, 0, buf, len);
+}
+
+static int stm_console_link(struct stm_source_data *data)
+{
+	struct stm_console *sc = container_of(data, struct stm_console, data);
+
+	strcpy(sc->console.name, "stm_console");
+	sc->console.write = stm_console_write;
+	sc->console.flags = CON_ENABLED | CON_PRINTBUFFER;
+	register_console(&sc->console);
+
+	return 0;
+}
+
+static void stm_console_unlink(struct stm_source_data *data)
+{
+	struct stm_console *sc = container_of(data, struct stm_console, data);
+
+	unregister_console(&sc->console);
+}
+
+static int stm_console_init(void)
+{
+	return stm_source_register_device(NULL, &stm_console.data);
+}
+
+static void stm_console_exit(void)
+{
+	stm_source_unregister_device(&stm_console.data);
+}
+
+module_init(stm_console_init);
+module_exit(stm_console_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("stm_console driver");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>");
diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c
new file mode 100644
index 0000000000..534fbefc7f
--- /dev/null
+++ b/drivers/hwtracing/stm/core.c
@@ -0,0 +1,1363 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * System Trace Module (STM) infrastructure
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * STM class implements generic infrastructure for  System Trace Module devices
+ * as defined in MIPI STPv2 specification.
+ */
+
+#include <linux/pm_runtime.h>
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/compat.h>
+#include <linux/kdev_t.h>
+#include <linux/srcu.h>
+#include <linux/slab.h>
+#include <linux/stm.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include "stm.h"
+
+#include <uapi/linux/stm.h>
+
+static unsigned int stm_core_up;
+
+/*
+ * The SRCU here makes sure that STM device doesn't disappear from under a
+ * stm_source_write() caller, which may want to have as little overhead as
+ * possible.
+ */
+static struct srcu_struct stm_source_srcu;
+
+static ssize_t masters_show(struct device *dev,
+			    struct device_attribute *attr,
+			    char *buf)
+{
+	struct stm_device *stm = to_stm_device(dev);
+	int ret;
+
+	ret = sprintf(buf, "%u %u\n", stm->data->sw_start, stm->data->sw_end);
+
+	return ret;
+}
+
+static DEVICE_ATTR_RO(masters);
+
+static ssize_t channels_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	struct stm_device *stm = to_stm_device(dev);
+	int ret;
+
+	ret = sprintf(buf, "%u\n", stm->data->sw_nchannels);
+
+	return ret;
+}
+
+static DEVICE_ATTR_RO(channels);
+
+static ssize_t hw_override_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct stm_device *stm = to_stm_device(dev);
+	int ret;
+
+	ret = sprintf(buf, "%u\n", stm->data->hw_override);
+
+	return ret;
+}
+
+static DEVICE_ATTR_RO(hw_override);
+
+static struct attribute *stm_attrs[] = {
+	&dev_attr_masters.attr,
+	&dev_attr_channels.attr,
+	&dev_attr_hw_override.attr,
+	NULL,
+};
+
+ATTRIBUTE_GROUPS(stm);
+
+static struct class stm_class = {
+	.name		= "stm",
+	.dev_groups	= stm_groups,
+};
+
+/**
+ * stm_find_device() - find stm device by name
+ * @buf:	character buffer containing the name
+ *
+ * This is called when either policy gets assigned to an stm device or an
+ * stm_source device gets linked to an stm device.
+ *
+ * This grabs device's reference (get_device()) and module reference, both
+ * of which the calling path needs to make sure to drop with stm_put_device().
+ *
+ * Return:	stm device pointer or null if lookup failed.
+ */
+struct stm_device *stm_find_device(const char *buf)
+{
+	struct stm_device *stm;
+	struct device *dev;
+
+	if (!stm_core_up)
+		return NULL;
+
+	dev = class_find_device_by_name(&stm_class, buf);
+	if (!dev)
+		return NULL;
+
+	stm = to_stm_device(dev);
+	if (!try_module_get(stm->owner)) {
+		/* matches class_find_device() above */
+		put_device(dev);
+		return NULL;
+	}
+
+	return stm;
+}
+
+/**
+ * stm_put_device() - drop references on the stm device
+ * @stm:	stm device, previously acquired by stm_find_device()
+ *
+ * This drops the module reference and device reference taken by
+ * stm_find_device() or stm_char_open().
+ */
+void stm_put_device(struct stm_device *stm)
+{
+	module_put(stm->owner);
+	put_device(&stm->dev);
+}
+
+/*
+ * Internally we only care about software-writable masters here, that is the
+ * ones in the range [stm_data->sw_start..stm_data..sw_end], however we need
+ * original master numbers to be visible externally, since they are the ones
+ * that will appear in the STP stream. Thus, the internal bookkeeping uses
+ * $master - stm_data->sw_start to reference master descriptors and such.
+ */
+
+#define __stm_master(_s, _m)				\
+	((_s)->masters[(_m) - (_s)->data->sw_start])
+
+static inline struct stp_master *
+stm_master(struct stm_device *stm, unsigned int idx)
+{
+	if (idx < stm->data->sw_start || idx > stm->data->sw_end)
+		return NULL;
+
+	return __stm_master(stm, idx);
+}
+
+static int stp_master_alloc(struct stm_device *stm, unsigned int idx)
+{
+	struct stp_master *master;
+
+	master = kzalloc(struct_size(master, chan_map,
+				     BITS_TO_LONGS(stm->data->sw_nchannels)),
+			 GFP_ATOMIC);
+	if (!master)
+		return -ENOMEM;
+
+	master->nr_free = stm->data->sw_nchannels;
+	__stm_master(stm, idx) = master;
+
+	return 0;
+}
+
+static void stp_master_free(struct stm_device *stm, unsigned int idx)
+{
+	struct stp_master *master = stm_master(stm, idx);
+
+	if (!master)
+		return;
+
+	__stm_master(stm, idx) = NULL;
+	kfree(master);
+}
+
+static void stm_output_claim(struct stm_device *stm, struct stm_output *output)
+{
+	struct stp_master *master = stm_master(stm, output->master);
+
+	lockdep_assert_held(&stm->mc_lock);
+	lockdep_assert_held(&output->lock);
+
+	if (WARN_ON_ONCE(master->nr_free < output->nr_chans))
+		return;
+
+	bitmap_allocate_region(&master->chan_map[0], output->channel,
+			       ilog2(output->nr_chans));
+
+	master->nr_free -= output->nr_chans;
+}
+
+static void
+stm_output_disclaim(struct stm_device *stm, struct stm_output *output)
+{
+	struct stp_master *master = stm_master(stm, output->master);
+
+	lockdep_assert_held(&stm->mc_lock);
+	lockdep_assert_held(&output->lock);
+
+	bitmap_release_region(&master->chan_map[0], output->channel,
+			      ilog2(output->nr_chans));
+
+	master->nr_free += output->nr_chans;
+	output->nr_chans = 0;
+}
+
+/*
+ * This is like bitmap_find_free_region(), except it can ignore @start bits
+ * at the beginning.
+ */
+static int find_free_channels(unsigned long *bitmap, unsigned int start,
+			      unsigned int end, unsigned int width)
+{
+	unsigned int pos;
+	int i;
+
+	for (pos = start; pos < end + 1; pos = ALIGN(pos, width)) {
+		pos = find_next_zero_bit(bitmap, end + 1, pos);
+		if (pos + width > end + 1)
+			break;
+
+		if (pos & (width - 1))
+			continue;
+
+		for (i = 1; i < width && !test_bit(pos + i, bitmap); i++)
+			;
+		if (i == width)
+			return pos;
+
+		/* step over [pos..pos+i) to continue search */
+		pos += i;
+	}
+
+	return -1;
+}
+
+static int
+stm_find_master_chan(struct stm_device *stm, unsigned int width,
+		     unsigned int *mstart, unsigned int mend,
+		     unsigned int *cstart, unsigned int cend)
+{
+	struct stp_master *master;
+	unsigned int midx;
+	int pos, err;
+
+	for (midx = *mstart; midx <= mend; midx++) {
+		if (!stm_master(stm, midx)) {
+			err = stp_master_alloc(stm, midx);
+			if (err)
+				return err;
+		}
+
+		master = stm_master(stm, midx);
+
+		if (!master->nr_free)
+			continue;
+
+		pos = find_free_channels(master->chan_map, *cstart, cend,
+					 width);
+		if (pos < 0)
+			continue;
+
+		*mstart = midx;
+		*cstart = pos;
+		return 0;
+	}
+
+	return -ENOSPC;
+}
+
+static int stm_output_assign(struct stm_device *stm, unsigned int width,
+			     struct stp_policy_node *policy_node,
+			     struct stm_output *output)
+{
+	unsigned int midx, cidx, mend, cend;
+	int ret = -EINVAL;
+
+	if (width > stm->data->sw_nchannels)
+		return -EINVAL;
+
+	/* We no longer accept policy_node==NULL here */
+	if (WARN_ON_ONCE(!policy_node))
+		return -EINVAL;
+
+	/*
+	 * Also, the caller holds reference to policy_node, so it won't
+	 * disappear on us.
+	 */
+	stp_policy_node_get_ranges(policy_node, &midx, &mend, &cidx, &cend);
+
+	spin_lock(&stm->mc_lock);
+	spin_lock(&output->lock);
+	/* output is already assigned -- shouldn't happen */
+	if (WARN_ON_ONCE(output->nr_chans))
+		goto unlock;
+
+	ret = stm_find_master_chan(stm, width, &midx, mend, &cidx, cend);
+	if (ret < 0)
+		goto unlock;
+
+	output->master = midx;
+	output->channel = cidx;
+	output->nr_chans = width;
+	if (stm->pdrv->output_open) {
+		void *priv = stp_policy_node_priv(policy_node);
+
+		if (WARN_ON_ONCE(!priv))
+			goto unlock;
+
+		/* configfs subsys mutex is held by the caller */
+		ret = stm->pdrv->output_open(priv, output);
+		if (ret)
+			goto unlock;
+	}
+
+	stm_output_claim(stm, output);
+	dev_dbg(&stm->dev, "assigned %u:%u (+%u)\n", midx, cidx, width);
+
+	ret = 0;
+unlock:
+	if (ret)
+		output->nr_chans = 0;
+
+	spin_unlock(&output->lock);
+	spin_unlock(&stm->mc_lock);
+
+	return ret;
+}
+
+static void stm_output_free(struct stm_device *stm, struct stm_output *output)
+{
+	spin_lock(&stm->mc_lock);
+	spin_lock(&output->lock);
+	if (output->nr_chans)
+		stm_output_disclaim(stm, output);
+	if (stm->pdrv && stm->pdrv->output_close)
+		stm->pdrv->output_close(output);
+	spin_unlock(&output->lock);
+	spin_unlock(&stm->mc_lock);
+}
+
+static void stm_output_init(struct stm_output *output)
+{
+	spin_lock_init(&output->lock);
+}
+
+static int major_match(struct device *dev, const void *data)
+{
+	unsigned int major = *(unsigned int *)data;
+
+	return MAJOR(dev->devt) == major;
+}
+
+/*
+ * Framing protocol management
+ * Modules can implement STM protocol drivers and (un-)register them
+ * with the STM class framework.
+ */
+static struct list_head stm_pdrv_head;
+static struct mutex stm_pdrv_mutex;
+
+struct stm_pdrv_entry {
+	struct list_head			entry;
+	const struct stm_protocol_driver	*pdrv;
+	const struct config_item_type		*node_type;
+};
+
+static const struct stm_pdrv_entry *
+__stm_lookup_protocol(const char *name)
+{
+	struct stm_pdrv_entry *pe;
+
+	/*
+	 * If no name is given (NULL or ""), fall back to "p_basic".
+	 */
+	if (!name || !*name)
+		name = "p_basic";
+
+	list_for_each_entry(pe, &stm_pdrv_head, entry) {
+		if (!strcmp(name, pe->pdrv->name))
+			return pe;
+	}
+
+	return NULL;
+}
+
+int stm_register_protocol(const struct stm_protocol_driver *pdrv)
+{
+	struct stm_pdrv_entry *pe = NULL;
+	int ret = -ENOMEM;
+
+	mutex_lock(&stm_pdrv_mutex);
+
+	if (__stm_lookup_protocol(pdrv->name)) {
+		ret = -EEXIST;
+		goto unlock;
+	}
+
+	pe = kzalloc(sizeof(*pe), GFP_KERNEL);
+	if (!pe)
+		goto unlock;
+
+	if (pdrv->policy_attr) {
+		pe->node_type = get_policy_node_type(pdrv->policy_attr);
+		if (!pe->node_type)
+			goto unlock;
+	}
+
+	list_add_tail(&pe->entry, &stm_pdrv_head);
+	pe->pdrv = pdrv;
+
+	ret = 0;
+unlock:
+	mutex_unlock(&stm_pdrv_mutex);
+
+	if (ret)
+		kfree(pe);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(stm_register_protocol);
+
+void stm_unregister_protocol(const struct stm_protocol_driver *pdrv)
+{
+	struct stm_pdrv_entry *pe, *iter;
+
+	mutex_lock(&stm_pdrv_mutex);
+
+	list_for_each_entry_safe(pe, iter, &stm_pdrv_head, entry) {
+		if (pe->pdrv == pdrv) {
+			list_del(&pe->entry);
+
+			if (pe->node_type) {
+				kfree(pe->node_type->ct_attrs);
+				kfree(pe->node_type);
+			}
+			kfree(pe);
+			break;
+		}
+	}
+
+	mutex_unlock(&stm_pdrv_mutex);
+}
+EXPORT_SYMBOL_GPL(stm_unregister_protocol);
+
+static bool stm_get_protocol(const struct stm_protocol_driver *pdrv)
+{
+	return try_module_get(pdrv->owner);
+}
+
+void stm_put_protocol(const struct stm_protocol_driver *pdrv)
+{
+	module_put(pdrv->owner);
+}
+
+int stm_lookup_protocol(const char *name,
+			const struct stm_protocol_driver **pdrv,
+			const struct config_item_type **node_type)
+{
+	const struct stm_pdrv_entry *pe;
+
+	mutex_lock(&stm_pdrv_mutex);
+
+	pe = __stm_lookup_protocol(name);
+	if (pe && pe->pdrv && stm_get_protocol(pe->pdrv)) {
+		*pdrv = pe->pdrv;
+		*node_type = pe->node_type;
+	}
+
+	mutex_unlock(&stm_pdrv_mutex);
+
+	return pe ? 0 : -ENOENT;
+}
+
+static int stm_char_open(struct inode *inode, struct file *file)
+{
+	struct stm_file *stmf;
+	struct device *dev;
+	unsigned int major = imajor(inode);
+	int err = -ENOMEM;
+
+	dev = class_find_device(&stm_class, NULL, &major, major_match);
+	if (!dev)
+		return -ENODEV;
+
+	stmf = kzalloc(sizeof(*stmf), GFP_KERNEL);
+	if (!stmf)
+		goto err_put_device;
+
+	err = -ENODEV;
+	stm_output_init(&stmf->output);
+	stmf->stm = to_stm_device(dev);
+
+	if (!try_module_get(stmf->stm->owner))
+		goto err_free;
+
+	file->private_data = stmf;
+
+	return nonseekable_open(inode, file);
+
+err_free:
+	kfree(stmf);
+err_put_device:
+	/* matches class_find_device() above */
+	put_device(dev);
+
+	return err;
+}
+
+static int stm_char_release(struct inode *inode, struct file *file)
+{
+	struct stm_file *stmf = file->private_data;
+	struct stm_device *stm = stmf->stm;
+
+	if (stm->data->unlink)
+		stm->data->unlink(stm->data, stmf->output.master,
+				  stmf->output.channel);
+
+	stm_output_free(stm, &stmf->output);
+
+	/*
+	 * matches the stm_char_open()'s
+	 * class_find_device() + try_module_get()
+	 */
+	stm_put_device(stm);
+	kfree(stmf);
+
+	return 0;
+}
+
+static int
+stm_assign_first_policy(struct stm_device *stm, struct stm_output *output,
+			char **ids, unsigned int width)
+{
+	struct stp_policy_node *pn;
+	int err, n;
+
+	/*
+	 * On success, stp_policy_node_lookup() will return holding the
+	 * configfs subsystem mutex, which is then released in
+	 * stp_policy_node_put(). This allows the pdrv->output_open() in
+	 * stm_output_assign() to serialize against the attribute accessors.
+	 */
+	for (n = 0, pn = NULL; ids[n] && !pn; n++)
+		pn = stp_policy_node_lookup(stm, ids[n]);
+
+	if (!pn)
+		return -EINVAL;
+
+	err = stm_output_assign(stm, width, pn, output);
+
+	stp_policy_node_put(pn);
+
+	return err;
+}
+
+/**
+ * stm_data_write() - send the given payload as data packets
+ * @data:	stm driver's data
+ * @m:		STP master
+ * @c:		STP channel
+ * @ts_first:	timestamp the first packet
+ * @buf:	data payload buffer
+ * @count:	data payload size
+ */
+ssize_t notrace stm_data_write(struct stm_data *data, unsigned int m,
+			       unsigned int c, bool ts_first, const void *buf,
+			       size_t count)
+{
+	unsigned int flags = ts_first ? STP_PACKET_TIMESTAMPED : 0;
+	ssize_t sz;
+	size_t pos;
+
+	for (pos = 0, sz = 0; pos < count; pos += sz) {
+		sz = min_t(unsigned int, count - pos, 8);
+		sz = data->packet(data, m, c, STP_PACKET_DATA, flags, sz,
+				  &((u8 *)buf)[pos]);
+		if (sz <= 0)
+			break;
+
+		if (ts_first) {
+			flags = 0;
+			ts_first = false;
+		}
+	}
+
+	return sz < 0 ? sz : pos;
+}
+EXPORT_SYMBOL_GPL(stm_data_write);
+
+static ssize_t notrace
+stm_write(struct stm_device *stm, struct stm_output *output,
+	  unsigned int chan, const char *buf, size_t count)
+{
+	int err;
+
+	/* stm->pdrv is serialized against policy_mutex */
+	if (!stm->pdrv)
+		return -ENODEV;
+
+	err = stm->pdrv->write(stm->data, output, chan, buf, count);
+	if (err < 0)
+		return err;
+
+	return err;
+}
+
+static ssize_t stm_char_write(struct file *file, const char __user *buf,
+			      size_t count, loff_t *ppos)
+{
+	struct stm_file *stmf = file->private_data;
+	struct stm_device *stm = stmf->stm;
+	char *kbuf;
+	int err;
+
+	if (count + 1 > PAGE_SIZE)
+		count = PAGE_SIZE - 1;
+
+	/*
+	 * If no m/c have been assigned to this writer up to this
+	 * point, try to use the task name and "default" policy entries.
+	 */
+	if (!stmf->output.nr_chans) {
+		char comm[sizeof(current->comm)];
+		char *ids[] = { comm, "default", NULL };
+
+		get_task_comm(comm, current);
+
+		err = stm_assign_first_policy(stmf->stm, &stmf->output, ids, 1);
+		/*
+		 * EBUSY means that somebody else just assigned this
+		 * output, which is just fine for write()
+		 */
+		if (err)
+			return err;
+	}
+
+	kbuf = kmalloc(count + 1, GFP_KERNEL);
+	if (!kbuf)
+		return -ENOMEM;
+
+	err = copy_from_user(kbuf, buf, count);
+	if (err) {
+		kfree(kbuf);
+		return -EFAULT;
+	}
+
+	pm_runtime_get_sync(&stm->dev);
+
+	count = stm_write(stm, &stmf->output, 0, kbuf, count);
+
+	pm_runtime_mark_last_busy(&stm->dev);
+	pm_runtime_put_autosuspend(&stm->dev);
+	kfree(kbuf);
+
+	return count;
+}
+
+static void stm_mmap_open(struct vm_area_struct *vma)
+{
+	struct stm_file *stmf = vma->vm_file->private_data;
+	struct stm_device *stm = stmf->stm;
+
+	pm_runtime_get(&stm->dev);
+}
+
+static void stm_mmap_close(struct vm_area_struct *vma)
+{
+	struct stm_file *stmf = vma->vm_file->private_data;
+	struct stm_device *stm = stmf->stm;
+
+	pm_runtime_mark_last_busy(&stm->dev);
+	pm_runtime_put_autosuspend(&stm->dev);
+}
+
+static const struct vm_operations_struct stm_mmap_vmops = {
+	.open	= stm_mmap_open,
+	.close	= stm_mmap_close,
+};
+
+static int stm_char_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct stm_file *stmf = file->private_data;
+	struct stm_device *stm = stmf->stm;
+	unsigned long size, phys;
+
+	if (!stm->data->mmio_addr)
+		return -EOPNOTSUPP;
+
+	if (vma->vm_pgoff)
+		return -EINVAL;
+
+	size = vma->vm_end - vma->vm_start;
+
+	if (stmf->output.nr_chans * stm->data->sw_mmiosz != size)
+		return -EINVAL;
+
+	phys = stm->data->mmio_addr(stm->data, stmf->output.master,
+				    stmf->output.channel,
+				    stmf->output.nr_chans);
+
+	if (!phys)
+		return -EINVAL;
+
+	pm_runtime_get_sync(&stm->dev);
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	vm_flags_set(vma, VM_IO | VM_DONTEXPAND | VM_DONTDUMP);
+	vma->vm_ops = &stm_mmap_vmops;
+	vm_iomap_memory(vma, phys, size);
+
+	return 0;
+}
+
+static int stm_char_policy_set_ioctl(struct stm_file *stmf, void __user *arg)
+{
+	struct stm_device *stm = stmf->stm;
+	struct stp_policy_id *id;
+	char *ids[] = { NULL, NULL };
+	int ret = -EINVAL, wlimit = 1;
+	u32 size;
+
+	if (stmf->output.nr_chans)
+		return -EBUSY;
+
+	if (copy_from_user(&size, arg, sizeof(size)))
+		return -EFAULT;
+
+	if (size < sizeof(*id) || size >= PATH_MAX + sizeof(*id))
+		return -EINVAL;
+
+	/*
+	 * size + 1 to make sure the .id string at the bottom is terminated,
+	 * which is also why memdup_user() is not useful here
+	 */
+	id = kzalloc(size + 1, GFP_KERNEL);
+	if (!id)
+		return -ENOMEM;
+
+	if (copy_from_user(id, arg, size)) {
+		ret = -EFAULT;
+		goto err_free;
+	}
+
+	if (id->__reserved_0 || id->__reserved_1)
+		goto err_free;
+
+	if (stm->data->sw_mmiosz)
+		wlimit = PAGE_SIZE / stm->data->sw_mmiosz;
+
+	if (id->width < 1 || id->width > wlimit)
+		goto err_free;
+
+	ids[0] = id->id;
+	ret = stm_assign_first_policy(stmf->stm, &stmf->output, ids,
+				      id->width);
+	if (ret)
+		goto err_free;
+
+	if (stm->data->link)
+		ret = stm->data->link(stm->data, stmf->output.master,
+				      stmf->output.channel);
+
+	if (ret)
+		stm_output_free(stmf->stm, &stmf->output);
+
+err_free:
+	kfree(id);
+
+	return ret;
+}
+
+static int stm_char_policy_get_ioctl(struct stm_file *stmf, void __user *arg)
+{
+	struct stp_policy_id id = {
+		.size		= sizeof(id),
+		.master		= stmf->output.master,
+		.channel	= stmf->output.channel,
+		.width		= stmf->output.nr_chans,
+		.__reserved_0	= 0,
+		.__reserved_1	= 0,
+	};
+
+	return copy_to_user(arg, &id, id.size) ? -EFAULT : 0;
+}
+
+static long
+stm_char_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct stm_file *stmf = file->private_data;
+	struct stm_data *stm_data = stmf->stm->data;
+	int err = -ENOTTY;
+	u64 options;
+
+	switch (cmd) {
+	case STP_POLICY_ID_SET:
+		err = stm_char_policy_set_ioctl(stmf, (void __user *)arg);
+		if (err)
+			return err;
+
+		return stm_char_policy_get_ioctl(stmf, (void __user *)arg);
+
+	case STP_POLICY_ID_GET:
+		return stm_char_policy_get_ioctl(stmf, (void __user *)arg);
+
+	case STP_SET_OPTIONS:
+		if (copy_from_user(&options, (u64 __user *)arg, sizeof(u64)))
+			return -EFAULT;
+
+		if (stm_data->set_options)
+			err = stm_data->set_options(stm_data,
+						    stmf->output.master,
+						    stmf->output.channel,
+						    stmf->output.nr_chans,
+						    options);
+
+		break;
+	default:
+		break;
+	}
+
+	return err;
+}
+
+static const struct file_operations stm_fops = {
+	.open		= stm_char_open,
+	.release	= stm_char_release,
+	.write		= stm_char_write,
+	.mmap		= stm_char_mmap,
+	.unlocked_ioctl	= stm_char_ioctl,
+	.compat_ioctl	= compat_ptr_ioctl,
+	.llseek		= no_llseek,
+};
+
+static void stm_device_release(struct device *dev)
+{
+	struct stm_device *stm = to_stm_device(dev);
+
+	vfree(stm);
+}
+
+int stm_register_device(struct device *parent, struct stm_data *stm_data,
+			struct module *owner)
+{
+	struct stm_device *stm;
+	unsigned int nmasters;
+	int err = -ENOMEM;
+
+	if (!stm_core_up)
+		return -EPROBE_DEFER;
+
+	if (!stm_data->packet || !stm_data->sw_nchannels)
+		return -EINVAL;
+
+	nmasters = stm_data->sw_end - stm_data->sw_start + 1;
+	stm = vzalloc(sizeof(*stm) + nmasters * sizeof(void *));
+	if (!stm)
+		return -ENOMEM;
+
+	stm->major = register_chrdev(0, stm_data->name, &stm_fops);
+	if (stm->major < 0)
+		goto err_free;
+
+	device_initialize(&stm->dev);
+	stm->dev.devt = MKDEV(stm->major, 0);
+	stm->dev.class = &stm_class;
+	stm->dev.parent = parent;
+	stm->dev.release = stm_device_release;
+
+	mutex_init(&stm->link_mutex);
+	spin_lock_init(&stm->link_lock);
+	INIT_LIST_HEAD(&stm->link_list);
+
+	/* initialize the object before it is accessible via sysfs */
+	spin_lock_init(&stm->mc_lock);
+	mutex_init(&stm->policy_mutex);
+	stm->sw_nmasters = nmasters;
+	stm->owner = owner;
+	stm->data = stm_data;
+	stm_data->stm = stm;
+
+	err = kobject_set_name(&stm->dev.kobj, "%s", stm_data->name);
+	if (err)
+		goto err_device;
+
+	err = device_add(&stm->dev);
+	if (err)
+		goto err_device;
+
+	/*
+	 * Use delayed autosuspend to avoid bouncing back and forth
+	 * on recurring character device writes, with the initial
+	 * delay time of 2 seconds.
+	 */
+	pm_runtime_no_callbacks(&stm->dev);
+	pm_runtime_use_autosuspend(&stm->dev);
+	pm_runtime_set_autosuspend_delay(&stm->dev, 2000);
+	pm_runtime_set_suspended(&stm->dev);
+	pm_runtime_enable(&stm->dev);
+
+	return 0;
+
+err_device:
+	unregister_chrdev(stm->major, stm_data->name);
+
+	/* matches device_initialize() above */
+	put_device(&stm->dev);
+err_free:
+	vfree(stm);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(stm_register_device);
+
+static int __stm_source_link_drop(struct stm_source_device *src,
+				  struct stm_device *stm);
+
+void stm_unregister_device(struct stm_data *stm_data)
+{
+	struct stm_device *stm = stm_data->stm;
+	struct stm_source_device *src, *iter;
+	int i, ret;
+
+	pm_runtime_dont_use_autosuspend(&stm->dev);
+	pm_runtime_disable(&stm->dev);
+
+	mutex_lock(&stm->link_mutex);
+	list_for_each_entry_safe(src, iter, &stm->link_list, link_entry) {
+		ret = __stm_source_link_drop(src, stm);
+		/*
+		 * src <-> stm link must not change under the same
+		 * stm::link_mutex, so complain loudly if it has;
+		 * also in this situation ret!=0 means this src is
+		 * not connected to this stm and it should be otherwise
+		 * safe to proceed with the tear-down of stm.
+		 */
+		WARN_ON_ONCE(ret);
+	}
+	mutex_unlock(&stm->link_mutex);
+
+	synchronize_srcu(&stm_source_srcu);
+
+	unregister_chrdev(stm->major, stm_data->name);
+
+	mutex_lock(&stm->policy_mutex);
+	if (stm->policy)
+		stp_policy_unbind(stm->policy);
+	mutex_unlock(&stm->policy_mutex);
+
+	for (i = stm->data->sw_start; i <= stm->data->sw_end; i++)
+		stp_master_free(stm, i);
+
+	device_unregister(&stm->dev);
+	stm_data->stm = NULL;
+}
+EXPORT_SYMBOL_GPL(stm_unregister_device);
+
+/*
+ * stm::link_list access serialization uses a spinlock and a mutex; holding
+ * either of them guarantees that the list is stable; modification requires
+ * holding both of them.
+ *
+ * Lock ordering is as follows:
+ *   stm::link_mutex
+ *     stm::link_lock
+ *       src::link_lock
+ */
+
+/**
+ * stm_source_link_add() - connect an stm_source device to an stm device
+ * @src:	stm_source device
+ * @stm:	stm device
+ *
+ * This function establishes a link from stm_source to an stm device so that
+ * the former can send out trace data to the latter.
+ *
+ * Return:	0 on success, -errno otherwise.
+ */
+static int stm_source_link_add(struct stm_source_device *src,
+			       struct stm_device *stm)
+{
+	char *ids[] = { NULL, "default", NULL };
+	int err = -ENOMEM;
+
+	mutex_lock(&stm->link_mutex);
+	spin_lock(&stm->link_lock);
+	spin_lock(&src->link_lock);
+
+	/* src->link is dereferenced under stm_source_srcu but not the list */
+	rcu_assign_pointer(src->link, stm);
+	list_add_tail(&src->link_entry, &stm->link_list);
+
+	spin_unlock(&src->link_lock);
+	spin_unlock(&stm->link_lock);
+	mutex_unlock(&stm->link_mutex);
+
+	ids[0] = kstrdup(src->data->name, GFP_KERNEL);
+	if (!ids[0])
+		goto fail_detach;
+
+	err = stm_assign_first_policy(stm, &src->output, ids,
+				      src->data->nr_chans);
+	kfree(ids[0]);
+
+	if (err)
+		goto fail_detach;
+
+	/* this is to notify the STM device that a new link has been made */
+	if (stm->data->link)
+		err = stm->data->link(stm->data, src->output.master,
+				      src->output.channel);
+
+	if (err)
+		goto fail_free_output;
+
+	/* this is to let the source carry out all necessary preparations */
+	if (src->data->link)
+		src->data->link(src->data);
+
+	return 0;
+
+fail_free_output:
+	stm_output_free(stm, &src->output);
+
+fail_detach:
+	mutex_lock(&stm->link_mutex);
+	spin_lock(&stm->link_lock);
+	spin_lock(&src->link_lock);
+
+	rcu_assign_pointer(src->link, NULL);
+	list_del_init(&src->link_entry);
+
+	spin_unlock(&src->link_lock);
+	spin_unlock(&stm->link_lock);
+	mutex_unlock(&stm->link_mutex);
+
+	return err;
+}
+
+/**
+ * __stm_source_link_drop() - detach stm_source from an stm device
+ * @src:	stm_source device
+ * @stm:	stm device
+ *
+ * If @stm is @src::link, disconnect them from one another and put the
+ * reference on the @stm device.
+ *
+ * Caller must hold stm::link_mutex.
+ */
+static int __stm_source_link_drop(struct stm_source_device *src,
+				  struct stm_device *stm)
+{
+	struct stm_device *link;
+	int ret = 0;
+
+	lockdep_assert_held(&stm->link_mutex);
+
+	/* for stm::link_list modification, we hold both mutex and spinlock */
+	spin_lock(&stm->link_lock);
+	spin_lock(&src->link_lock);
+	link = srcu_dereference_check(src->link, &stm_source_srcu, 1);
+
+	/*
+	 * The linked device may have changed since we last looked, because
+	 * we weren't holding the src::link_lock back then; if this is the
+	 * case, tell the caller to retry.
+	 */
+	if (link != stm) {
+		ret = -EAGAIN;
+		goto unlock;
+	}
+
+	stm_output_free(link, &src->output);
+	list_del_init(&src->link_entry);
+	pm_runtime_mark_last_busy(&link->dev);
+	pm_runtime_put_autosuspend(&link->dev);
+	/* matches stm_find_device() from stm_source_link_store() */
+	stm_put_device(link);
+	rcu_assign_pointer(src->link, NULL);
+
+unlock:
+	spin_unlock(&src->link_lock);
+	spin_unlock(&stm->link_lock);
+
+	/*
+	 * Call the unlink callbacks for both source and stm, when we know
+	 * that we have actually performed the unlinking.
+	 */
+	if (!ret) {
+		if (src->data->unlink)
+			src->data->unlink(src->data);
+
+		if (stm->data->unlink)
+			stm->data->unlink(stm->data, src->output.master,
+					  src->output.channel);
+	}
+
+	return ret;
+}
+
+/**
+ * stm_source_link_drop() - detach stm_source from its stm device
+ * @src:	stm_source device
+ *
+ * Unlinking means disconnecting from source's STM device; after this
+ * writes will be unsuccessful until it is linked to a new STM device.
+ *
+ * This will happen on "stm_source_link" sysfs attribute write to undo
+ * the existing link (if any), or on linked STM device's de-registration.
+ */
+static void stm_source_link_drop(struct stm_source_device *src)
+{
+	struct stm_device *stm;
+	int idx, ret;
+
+retry:
+	idx = srcu_read_lock(&stm_source_srcu);
+	/*
+	 * The stm device will be valid for the duration of this
+	 * read section, but the link may change before we grab
+	 * the src::link_lock in __stm_source_link_drop().
+	 */
+	stm = srcu_dereference(src->link, &stm_source_srcu);
+
+	ret = 0;
+	if (stm) {
+		mutex_lock(&stm->link_mutex);
+		ret = __stm_source_link_drop(src, stm);
+		mutex_unlock(&stm->link_mutex);
+	}
+
+	srcu_read_unlock(&stm_source_srcu, idx);
+
+	/* if it did change, retry */
+	if (ret == -EAGAIN)
+		goto retry;
+}
+
+static ssize_t stm_source_link_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct stm_source_device *src = to_stm_source_device(dev);
+	struct stm_device *stm;
+	int idx, ret;
+
+	idx = srcu_read_lock(&stm_source_srcu);
+	stm = srcu_dereference(src->link, &stm_source_srcu);
+	ret = sprintf(buf, "%s\n",
+		      stm ? dev_name(&stm->dev) : "<none>");
+	srcu_read_unlock(&stm_source_srcu, idx);
+
+	return ret;
+}
+
+static ssize_t stm_source_link_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct stm_source_device *src = to_stm_source_device(dev);
+	struct stm_device *link;
+	int err;
+
+	stm_source_link_drop(src);
+
+	link = stm_find_device(buf);
+	if (!link)
+		return -EINVAL;
+
+	pm_runtime_get(&link->dev);
+
+	err = stm_source_link_add(src, link);
+	if (err) {
+		pm_runtime_put_autosuspend(&link->dev);
+		/* matches the stm_find_device() above */
+		stm_put_device(link);
+	}
+
+	return err ? : count;
+}
+
+static DEVICE_ATTR_RW(stm_source_link);
+
+static struct attribute *stm_source_attrs[] = {
+	&dev_attr_stm_source_link.attr,
+	NULL,
+};
+
+ATTRIBUTE_GROUPS(stm_source);
+
+static struct class stm_source_class = {
+	.name		= "stm_source",
+	.dev_groups	= stm_source_groups,
+};
+
+static void stm_source_device_release(struct device *dev)
+{
+	struct stm_source_device *src = to_stm_source_device(dev);
+
+	kfree(src);
+}
+
+/**
+ * stm_source_register_device() - register an stm_source device
+ * @parent:	parent device
+ * @data:	device description structure
+ *
+ * This will create a device of stm_source class that can write
+ * data to an stm device once linked.
+ *
+ * Return:	0 on success, -errno otherwise.
+ */
+int stm_source_register_device(struct device *parent,
+			       struct stm_source_data *data)
+{
+	struct stm_source_device *src;
+	int err;
+
+	if (!stm_core_up)
+		return -EPROBE_DEFER;
+
+	src = kzalloc(sizeof(*src), GFP_KERNEL);
+	if (!src)
+		return -ENOMEM;
+
+	device_initialize(&src->dev);
+	src->dev.class = &stm_source_class;
+	src->dev.parent = parent;
+	src->dev.release = stm_source_device_release;
+
+	err = kobject_set_name(&src->dev.kobj, "%s", data->name);
+	if (err)
+		goto err;
+
+	pm_runtime_no_callbacks(&src->dev);
+	pm_runtime_forbid(&src->dev);
+
+	err = device_add(&src->dev);
+	if (err)
+		goto err;
+
+	stm_output_init(&src->output);
+	spin_lock_init(&src->link_lock);
+	INIT_LIST_HEAD(&src->link_entry);
+	src->data = data;
+	data->src = src;
+
+	return 0;
+
+err:
+	put_device(&src->dev);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(stm_source_register_device);
+
+/**
+ * stm_source_unregister_device() - unregister an stm_source device
+ * @data:	device description that was used to register the device
+ *
+ * This will remove a previously created stm_source device from the system.
+ */
+void stm_source_unregister_device(struct stm_source_data *data)
+{
+	struct stm_source_device *src = data->src;
+
+	stm_source_link_drop(src);
+
+	device_unregister(&src->dev);
+}
+EXPORT_SYMBOL_GPL(stm_source_unregister_device);
+
+int notrace stm_source_write(struct stm_source_data *data,
+			     unsigned int chan,
+			     const char *buf, size_t count)
+{
+	struct stm_source_device *src = data->src;
+	struct stm_device *stm;
+	int idx;
+
+	if (!src->output.nr_chans)
+		return -ENODEV;
+
+	if (chan >= src->output.nr_chans)
+		return -EINVAL;
+
+	idx = srcu_read_lock(&stm_source_srcu);
+
+	stm = srcu_dereference(src->link, &stm_source_srcu);
+	if (stm)
+		count = stm_write(stm, &src->output, chan, buf, count);
+	else
+		count = -ENODEV;
+
+	srcu_read_unlock(&stm_source_srcu, idx);
+
+	return count;
+}
+EXPORT_SYMBOL_GPL(stm_source_write);
+
+static int __init stm_core_init(void)
+{
+	int err;
+
+	err = class_register(&stm_class);
+	if (err)
+		return err;
+
+	err = class_register(&stm_source_class);
+	if (err)
+		goto err_stm;
+
+	err = stp_configfs_init();
+	if (err)
+		goto err_src;
+
+	init_srcu_struct(&stm_source_srcu);
+	INIT_LIST_HEAD(&stm_pdrv_head);
+	mutex_init(&stm_pdrv_mutex);
+
+	/*
+	 * So as to not confuse existing users with a requirement
+	 * to load yet another module, do it here.
+	 */
+	if (IS_ENABLED(CONFIG_STM_PROTO_BASIC))
+		(void)request_module_nowait("stm_p_basic");
+	stm_core_up++;
+
+	return 0;
+
+err_src:
+	class_unregister(&stm_source_class);
+err_stm:
+	class_unregister(&stm_class);
+
+	return err;
+}
+
+module_init(stm_core_init);
+
+static void __exit stm_core_exit(void)
+{
+	cleanup_srcu_struct(&stm_source_srcu);
+	class_unregister(&stm_source_class);
+	class_unregister(&stm_class);
+	stp_configfs_exit();
+}
+
+module_exit(stm_core_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("System Trace Module device class");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>");
diff --git a/drivers/hwtracing/stm/dummy_stm.c b/drivers/hwtracing/stm/dummy_stm.c
new file mode 100644
index 0000000000..38528ffdc0
--- /dev/null
+++ b/drivers/hwtracing/stm/dummy_stm.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A dummy STM device for stm/stm_source class testing.
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * STM class implements generic infrastructure for  System Trace Module devices
+ * as defined in MIPI STPv2 specification.
+ */
+
+#undef DEBUG
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/stm.h>
+#include <uapi/linux/stm.h>
+
+static ssize_t notrace
+dummy_stm_packet(struct stm_data *stm_data, unsigned int master,
+		 unsigned int channel, unsigned int packet, unsigned int flags,
+		 unsigned int size, const unsigned char *payload)
+{
+#ifdef DEBUG
+	u64 pl = 0;
+
+	if (payload)
+		pl = *(u64 *)payload;
+
+	if (size < 8)
+		pl &= (1ull << (size * 8)) - 1;
+	trace_printk("[%u:%u] [pkt: %x/%x] (%llx)\n", master, channel,
+		     packet, size, pl);
+#endif
+	return size;
+}
+
+#define DUMMY_STM_MAX 32
+
+static struct stm_data dummy_stm[DUMMY_STM_MAX];
+
+static int nr_dummies = 4;
+
+module_param(nr_dummies, int, 0400);
+
+static unsigned int fail_mode;
+
+module_param(fail_mode, int, 0600);
+
+static unsigned int master_min;
+
+module_param(master_min, int, 0400);
+
+static unsigned int master_max = STP_MASTER_MAX;
+
+module_param(master_max, int, 0400);
+
+static unsigned int nr_channels = STP_CHANNEL_MAX;
+
+module_param(nr_channels, int, 0400);
+
+static int dummy_stm_link(struct stm_data *data, unsigned int master,
+			  unsigned int channel)
+{
+	if (fail_mode && (channel & fail_mode))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int dummy_stm_init(void)
+{
+	int i, ret = -ENOMEM;
+
+	if (nr_dummies < 0 || nr_dummies > DUMMY_STM_MAX)
+		return -EINVAL;
+
+	if (master_min > master_max ||
+	    master_max > STP_MASTER_MAX ||
+	    nr_channels > STP_CHANNEL_MAX)
+		return -EINVAL;
+
+	for (i = 0; i < nr_dummies; i++) {
+		dummy_stm[i].name = kasprintf(GFP_KERNEL, "dummy_stm.%d", i);
+		if (!dummy_stm[i].name)
+			goto fail_unregister;
+
+		dummy_stm[i].sw_start		= master_min;
+		dummy_stm[i].sw_end		= master_max;
+		dummy_stm[i].sw_nchannels	= nr_channels;
+		dummy_stm[i].packet		= dummy_stm_packet;
+		dummy_stm[i].link		= dummy_stm_link;
+
+		ret = stm_register_device(NULL, &dummy_stm[i], THIS_MODULE);
+		if (ret)
+			goto fail_free;
+	}
+
+	return 0;
+
+fail_unregister:
+	for (i--; i >= 0; i--) {
+		stm_unregister_device(&dummy_stm[i]);
+fail_free:
+		kfree(dummy_stm[i].name);
+	}
+
+	return ret;
+
+}
+
+static void dummy_stm_exit(void)
+{
+	int i;
+
+	for (i = 0; i < nr_dummies; i++) {
+		stm_unregister_device(&dummy_stm[i]);
+		kfree(dummy_stm[i].name);
+	}
+}
+
+module_init(dummy_stm_init);
+module_exit(dummy_stm_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("dummy_stm device");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>");
diff --git a/drivers/hwtracing/stm/ftrace.c b/drivers/hwtracing/stm/ftrace.c
new file mode 100644
index 0000000000..3bb606dfa6
--- /dev/null
+++ b/drivers/hwtracing/stm/ftrace.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Simple kernel driver to link kernel Ftrace and an STM device
+ * Copyright (c) 2016, Linaro Ltd.
+ *
+ * STM Ftrace will be registered as a trace_export.
+ */
+
+#include <linux/module.h>
+#include <linux/stm.h>
+#include <linux/trace.h>
+
+#define STM_FTRACE_NR_CHANNELS 1
+#define STM_FTRACE_CHAN 0
+
+static int stm_ftrace_link(struct stm_source_data *data);
+static void stm_ftrace_unlink(struct stm_source_data *data);
+
+static struct stm_ftrace {
+	struct stm_source_data	data;
+	struct trace_export	ftrace;
+} stm_ftrace = {
+	.data	= {
+		.name		= "ftrace",
+		.nr_chans	= STM_FTRACE_NR_CHANNELS,
+		.link		= stm_ftrace_link,
+		.unlink		= stm_ftrace_unlink,
+	},
+};
+
+/**
+ * stm_ftrace_write() - write data to STM via 'stm_ftrace' source
+ * @buf:	buffer containing the data packet
+ * @len:	length of the data packet
+ */
+static void notrace
+stm_ftrace_write(struct trace_export *export, const void *buf, unsigned int len)
+{
+	struct stm_ftrace *stm = container_of(export, struct stm_ftrace, ftrace);
+	/* This is called from trace system with preemption disabled */
+	unsigned int cpu = smp_processor_id();
+
+	stm_source_write(&stm->data, STM_FTRACE_CHAN + cpu, buf, len);
+}
+
+static int stm_ftrace_link(struct stm_source_data *data)
+{
+	struct stm_ftrace *sf = container_of(data, struct stm_ftrace, data);
+
+	sf->ftrace.write = stm_ftrace_write;
+	sf->ftrace.flags = TRACE_EXPORT_FUNCTION | TRACE_EXPORT_EVENT
+			| TRACE_EXPORT_MARKER;
+
+	return register_ftrace_export(&sf->ftrace);
+}
+
+static void stm_ftrace_unlink(struct stm_source_data *data)
+{
+	struct stm_ftrace *sf = container_of(data, struct stm_ftrace, data);
+
+	unregister_ftrace_export(&sf->ftrace);
+}
+
+static int __init stm_ftrace_init(void)
+{
+	int ret;
+
+	stm_ftrace.data.nr_chans = roundup_pow_of_two(num_possible_cpus());
+	ret = stm_source_register_device(NULL, &stm_ftrace.data);
+	if (ret)
+		pr_err("Failed to register stm_source - ftrace.\n");
+
+	return ret;
+}
+
+static void __exit stm_ftrace_exit(void)
+{
+	stm_source_unregister_device(&stm_ftrace.data);
+}
+
+module_init(stm_ftrace_init);
+module_exit(stm_ftrace_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("stm_ftrace driver");
+MODULE_AUTHOR("Chunyan Zhang <zhang.chunyan@linaro.org>");
diff --git a/drivers/hwtracing/stm/heartbeat.c b/drivers/hwtracing/stm/heartbeat.c
new file mode 100644
index 0000000000..81d7b21d31
--- /dev/null
+++ b/drivers/hwtracing/stm/heartbeat.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Simple heartbeat STM source driver
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * Heartbeat STM source will send repetitive messages over STM devices to a
+ * trace host.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/hrtimer.h>
+#include <linux/slab.h>
+#include <linux/stm.h>
+
+#define STM_HEARTBEAT_MAX	32
+
+static int nr_devs = 4;
+static int interval_ms = 10;
+
+module_param(nr_devs, int, 0400);
+module_param(interval_ms, int, 0600);
+
+static struct stm_heartbeat {
+	struct stm_source_data	data;
+	struct hrtimer		hrtimer;
+	unsigned int		active;
+} stm_heartbeat[STM_HEARTBEAT_MAX];
+
+static const char str[] = "heartbeat stm source driver is here to serve you";
+
+static enum hrtimer_restart stm_heartbeat_hrtimer_handler(struct hrtimer *hr)
+{
+	struct stm_heartbeat *heartbeat = container_of(hr, struct stm_heartbeat,
+						       hrtimer);
+
+	stm_source_write(&heartbeat->data, 0, str, sizeof str);
+	if (heartbeat->active)
+		hrtimer_forward_now(hr, ms_to_ktime(interval_ms));
+
+	return heartbeat->active ? HRTIMER_RESTART : HRTIMER_NORESTART;
+}
+
+static int stm_heartbeat_link(struct stm_source_data *data)
+{
+	struct stm_heartbeat *heartbeat =
+		container_of(data, struct stm_heartbeat, data);
+
+	heartbeat->active = 1;
+	hrtimer_start(&heartbeat->hrtimer, ms_to_ktime(interval_ms),
+		      HRTIMER_MODE_ABS);
+
+	return 0;
+}
+
+static void stm_heartbeat_unlink(struct stm_source_data *data)
+{
+	struct stm_heartbeat *heartbeat =
+		container_of(data, struct stm_heartbeat, data);
+
+	heartbeat->active = 0;
+	hrtimer_cancel(&heartbeat->hrtimer);
+}
+
+static int stm_heartbeat_init(void)
+{
+	int i, ret;
+
+	if (nr_devs < 0 || nr_devs > STM_HEARTBEAT_MAX)
+		return -EINVAL;
+
+	for (i = 0; i < nr_devs; i++) {
+		stm_heartbeat[i].data.name =
+			kasprintf(GFP_KERNEL, "heartbeat.%d", i);
+		if (!stm_heartbeat[i].data.name) {
+			ret = -ENOMEM;
+			goto fail_unregister;
+		}
+
+		stm_heartbeat[i].data.nr_chans	= 1;
+		stm_heartbeat[i].data.link	= stm_heartbeat_link;
+		stm_heartbeat[i].data.unlink	= stm_heartbeat_unlink;
+		hrtimer_init(&stm_heartbeat[i].hrtimer, CLOCK_MONOTONIC,
+			     HRTIMER_MODE_ABS);
+		stm_heartbeat[i].hrtimer.function =
+			stm_heartbeat_hrtimer_handler;
+
+		ret = stm_source_register_device(NULL, &stm_heartbeat[i].data);
+		if (ret)
+			goto fail_free;
+	}
+
+	return 0;
+
+fail_unregister:
+	for (i--; i >= 0; i--) {
+		stm_source_unregister_device(&stm_heartbeat[i].data);
+fail_free:
+		kfree(stm_heartbeat[i].data.name);
+	}
+
+	return ret;
+}
+
+static void stm_heartbeat_exit(void)
+{
+	int i;
+
+	for (i = 0; i < nr_devs; i++) {
+		stm_source_unregister_device(&stm_heartbeat[i].data);
+		kfree(stm_heartbeat[i].data.name);
+	}
+}
+
+module_init(stm_heartbeat_init);
+module_exit(stm_heartbeat_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("stm_heartbeat driver");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>");
diff --git a/drivers/hwtracing/stm/p_basic.c b/drivers/hwtracing/stm/p_basic.c
new file mode 100644
index 0000000000..8980a6a5fd
--- /dev/null
+++ b/drivers/hwtracing/stm/p_basic.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Basic framing protocol for STM devices.
+ * Copyright (c) 2018, Intel Corporation.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/stm.h>
+#include "stm.h"
+
+static ssize_t basic_write(struct stm_data *data, struct stm_output *output,
+			   unsigned int chan, const char *buf, size_t count)
+{
+	unsigned int c = output->channel + chan;
+	unsigned int m = output->master;
+	const unsigned char nil = 0;
+	ssize_t sz;
+
+	sz = stm_data_write(data, m, c, true, buf, count);
+	if (sz > 0)
+		data->packet(data, m, c, STP_PACKET_FLAG, 0, 0, &nil);
+
+	return sz;
+}
+
+static const struct stm_protocol_driver basic_pdrv = {
+	.owner	= THIS_MODULE,
+	.name	= "p_basic",
+	.write	= basic_write,
+};
+
+static int basic_stm_init(void)
+{
+	return stm_register_protocol(&basic_pdrv);
+}
+
+static void basic_stm_exit(void)
+{
+	stm_unregister_protocol(&basic_pdrv);
+}
+
+module_init(basic_stm_init);
+module_exit(basic_stm_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Basic STM framing protocol driver");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>");
diff --git a/drivers/hwtracing/stm/p_sys-t.c b/drivers/hwtracing/stm/p_sys-t.c
new file mode 100644
index 0000000000..8254971c02
--- /dev/null
+++ b/drivers/hwtracing/stm/p_sys-t.c
@@ -0,0 +1,384 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * MIPI SyS-T framing protocol for STM devices.
+ * Copyright (c) 2018, Intel Corporation.
+ */
+
+#include <linux/configfs.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/uuid.h>
+#include <linux/stm.h>
+#include "stm.h"
+
+enum sys_t_message_type {
+	MIPI_SYST_TYPE_BUILD	= 0,
+	MIPI_SYST_TYPE_SHORT32,
+	MIPI_SYST_TYPE_STRING,
+	MIPI_SYST_TYPE_CATALOG,
+	MIPI_SYST_TYPE_RAW	= 6,
+	MIPI_SYST_TYPE_SHORT64,
+	MIPI_SYST_TYPE_CLOCK,
+};
+
+enum sys_t_message_severity {
+	MIPI_SYST_SEVERITY_MAX	= 0,
+	MIPI_SYST_SEVERITY_FATAL,
+	MIPI_SYST_SEVERITY_ERROR,
+	MIPI_SYST_SEVERITY_WARNING,
+	MIPI_SYST_SEVERITY_INFO,
+	MIPI_SYST_SEVERITY_USER1,
+	MIPI_SYST_SEVERITY_USER2,
+	MIPI_SYST_SEVERITY_DEBUG,
+};
+
+enum sys_t_message_build_subtype {
+	MIPI_SYST_BUILD_ID_COMPACT32 = 0,
+	MIPI_SYST_BUILD_ID_COMPACT64,
+	MIPI_SYST_BUILD_ID_LONG,
+};
+
+enum sys_t_message_clock_subtype {
+	MIPI_SYST_CLOCK_TRANSPORT_SYNC = 1,
+};
+
+enum sys_t_message_string_subtype {
+	MIPI_SYST_STRING_GENERIC	= 1,
+	MIPI_SYST_STRING_FUNCTIONENTER,
+	MIPI_SYST_STRING_FUNCTIONEXIT,
+	MIPI_SYST_STRING_INVALIDPARAM	= 5,
+	MIPI_SYST_STRING_ASSERT		= 7,
+	MIPI_SYST_STRING_PRINTF_32	= 11,
+	MIPI_SYST_STRING_PRINTF_64	= 12,
+};
+
+#define MIPI_SYST_TYPE(t)		((u32)(MIPI_SYST_TYPE_ ## t))
+#define MIPI_SYST_SEVERITY(s)		((u32)(MIPI_SYST_SEVERITY_ ## s) << 4)
+#define MIPI_SYST_OPT_LOC		BIT(8)
+#define MIPI_SYST_OPT_LEN		BIT(9)
+#define MIPI_SYST_OPT_CHK		BIT(10)
+#define MIPI_SYST_OPT_TS		BIT(11)
+#define MIPI_SYST_UNIT(u)		((u32)(u) << 12)
+#define MIPI_SYST_ORIGIN(o)		((u32)(o) << 16)
+#define MIPI_SYST_OPT_GUID		BIT(23)
+#define MIPI_SYST_SUBTYPE(s)		((u32)(MIPI_SYST_ ## s) << 24)
+#define MIPI_SYST_UNITLARGE(u)		(MIPI_SYST_UNIT(u & 0xf) | \
+					 MIPI_SYST_ORIGIN(u >> 4))
+#define MIPI_SYST_TYPES(t, s)		(MIPI_SYST_TYPE(t) | \
+					 MIPI_SYST_SUBTYPE(t ## _ ## s))
+
+#define DATA_HEADER	(MIPI_SYST_TYPES(STRING, GENERIC)	| \
+			 MIPI_SYST_SEVERITY(INFO)		| \
+			 MIPI_SYST_OPT_GUID)
+
+#define CLOCK_SYNC_HEADER	(MIPI_SYST_TYPES(CLOCK, TRANSPORT_SYNC)	| \
+				 MIPI_SYST_SEVERITY(MAX))
+
+struct sys_t_policy_node {
+	uuid_t		uuid;
+	bool		do_len;
+	unsigned long	ts_interval;
+	unsigned long	clocksync_interval;
+};
+
+struct sys_t_output {
+	struct sys_t_policy_node	node;
+	unsigned long	ts_jiffies;
+	unsigned long	clocksync_jiffies;
+};
+
+static void sys_t_policy_node_init(void *priv)
+{
+	struct sys_t_policy_node *pn = priv;
+
+	uuid_gen(&pn->uuid);
+}
+
+static int sys_t_output_open(void *priv, struct stm_output *output)
+{
+	struct sys_t_policy_node *pn = priv;
+	struct sys_t_output *opriv;
+
+	opriv = kzalloc(sizeof(*opriv), GFP_ATOMIC);
+	if (!opriv)
+		return -ENOMEM;
+
+	memcpy(&opriv->node, pn, sizeof(opriv->node));
+	output->pdrv_private = opriv;
+
+	return 0;
+}
+
+static void sys_t_output_close(struct stm_output *output)
+{
+	kfree(output->pdrv_private);
+}
+
+static ssize_t sys_t_policy_uuid_show(struct config_item *item,
+				      char *page)
+{
+	struct sys_t_policy_node *pn = to_pdrv_policy_node(item);
+
+	return sprintf(page, "%pU\n", &pn->uuid);
+}
+
+static ssize_t
+sys_t_policy_uuid_store(struct config_item *item, const char *page,
+			size_t count)
+{
+	struct mutex *mutexp = &item->ci_group->cg_subsys->su_mutex;
+	struct sys_t_policy_node *pn = to_pdrv_policy_node(item);
+	int ret;
+
+	mutex_lock(mutexp);
+	ret = uuid_parse(page, &pn->uuid);
+	mutex_unlock(mutexp);
+
+	return ret < 0 ? ret : count;
+}
+
+CONFIGFS_ATTR(sys_t_policy_, uuid);
+
+static ssize_t sys_t_policy_do_len_show(struct config_item *item,
+				      char *page)
+{
+	struct sys_t_policy_node *pn = to_pdrv_policy_node(item);
+
+	return sprintf(page, "%d\n", pn->do_len);
+}
+
+static ssize_t
+sys_t_policy_do_len_store(struct config_item *item, const char *page,
+			size_t count)
+{
+	struct mutex *mutexp = &item->ci_group->cg_subsys->su_mutex;
+	struct sys_t_policy_node *pn = to_pdrv_policy_node(item);
+	int ret;
+
+	mutex_lock(mutexp);
+	ret = kstrtobool(page, &pn->do_len);
+	mutex_unlock(mutexp);
+
+	return ret ? ret : count;
+}
+
+CONFIGFS_ATTR(sys_t_policy_, do_len);
+
+static ssize_t sys_t_policy_ts_interval_show(struct config_item *item,
+					     char *page)
+{
+	struct sys_t_policy_node *pn = to_pdrv_policy_node(item);
+
+	return sprintf(page, "%u\n", jiffies_to_msecs(pn->ts_interval));
+}
+
+static ssize_t
+sys_t_policy_ts_interval_store(struct config_item *item, const char *page,
+			       size_t count)
+{
+	struct mutex *mutexp = &item->ci_group->cg_subsys->su_mutex;
+	struct sys_t_policy_node *pn = to_pdrv_policy_node(item);
+	unsigned int ms;
+	int ret;
+
+	mutex_lock(mutexp);
+	ret = kstrtouint(page, 10, &ms);
+	mutex_unlock(mutexp);
+
+	if (!ret) {
+		pn->ts_interval = msecs_to_jiffies(ms);
+		return count;
+	}
+
+	return ret;
+}
+
+CONFIGFS_ATTR(sys_t_policy_, ts_interval);
+
+static ssize_t sys_t_policy_clocksync_interval_show(struct config_item *item,
+						    char *page)
+{
+	struct sys_t_policy_node *pn = to_pdrv_policy_node(item);
+
+	return sprintf(page, "%u\n", jiffies_to_msecs(pn->clocksync_interval));
+}
+
+static ssize_t
+sys_t_policy_clocksync_interval_store(struct config_item *item,
+				      const char *page, size_t count)
+{
+	struct mutex *mutexp = &item->ci_group->cg_subsys->su_mutex;
+	struct sys_t_policy_node *pn = to_pdrv_policy_node(item);
+	unsigned int ms;
+	int ret;
+
+	mutex_lock(mutexp);
+	ret = kstrtouint(page, 10, &ms);
+	mutex_unlock(mutexp);
+
+	if (!ret) {
+		pn->clocksync_interval = msecs_to_jiffies(ms);
+		return count;
+	}
+
+	return ret;
+}
+
+CONFIGFS_ATTR(sys_t_policy_, clocksync_interval);
+
+static struct configfs_attribute *sys_t_policy_attrs[] = {
+	&sys_t_policy_attr_uuid,
+	&sys_t_policy_attr_do_len,
+	&sys_t_policy_attr_ts_interval,
+	&sys_t_policy_attr_clocksync_interval,
+	NULL,
+};
+
+static inline bool sys_t_need_ts(struct sys_t_output *op)
+{
+	if (op->node.ts_interval &&
+	    time_after(jiffies, op->ts_jiffies + op->node.ts_interval)) {
+		op->ts_jiffies = jiffies;
+
+		return true;
+	}
+
+	return false;
+}
+
+static bool sys_t_need_clock_sync(struct sys_t_output *op)
+{
+	if (op->node.clocksync_interval &&
+	    time_after(jiffies,
+		       op->clocksync_jiffies + op->node.clocksync_interval)) {
+		op->clocksync_jiffies = jiffies;
+
+		return true;
+	}
+
+	return false;
+}
+
+static ssize_t
+sys_t_clock_sync(struct stm_data *data, unsigned int m, unsigned int c)
+{
+	u32 header = CLOCK_SYNC_HEADER;
+	const unsigned char nil = 0;
+	u64 payload[2]; /* Clock value and frequency */
+	ssize_t sz;
+
+	sz = data->packet(data, m, c, STP_PACKET_DATA, STP_PACKET_TIMESTAMPED,
+			  4, (u8 *)&header);
+	if (sz <= 0)
+		return sz;
+
+	payload[0] = ktime_get_real_ns();
+	payload[1] = NSEC_PER_SEC;
+	sz = stm_data_write(data, m, c, false, &payload, sizeof(payload));
+	if (sz <= 0)
+		return sz;
+
+	data->packet(data, m, c, STP_PACKET_FLAG, 0, 0, &nil);
+
+	return sizeof(header) + sizeof(payload);
+}
+
+static ssize_t sys_t_write(struct stm_data *data, struct stm_output *output,
+			   unsigned int chan, const char *buf, size_t count)
+{
+	struct sys_t_output *op = output->pdrv_private;
+	unsigned int c = output->channel + chan;
+	unsigned int m = output->master;
+	const unsigned char nil = 0;
+	u32 header = DATA_HEADER;
+	u8 uuid[UUID_SIZE];
+	ssize_t sz;
+
+	/* We require an existing policy node to proceed */
+	if (!op)
+		return -EINVAL;
+
+	if (sys_t_need_clock_sync(op)) {
+		sz = sys_t_clock_sync(data, m, c);
+		if (sz <= 0)
+			return sz;
+	}
+
+	if (op->node.do_len)
+		header |= MIPI_SYST_OPT_LEN;
+	if (sys_t_need_ts(op))
+		header |= MIPI_SYST_OPT_TS;
+
+	/*
+	 * STP framing rules for SyS-T frames:
+	 *   * the first packet of the SyS-T frame is timestamped;
+	 *   * the last packet is a FLAG.
+	 */
+	/* Message layout: HEADER / GUID / [LENGTH /][TIMESTAMP /] DATA */
+	/* HEADER */
+	sz = data->packet(data, m, c, STP_PACKET_DATA, STP_PACKET_TIMESTAMPED,
+			  4, (u8 *)&header);
+	if (sz <= 0)
+		return sz;
+
+	/* GUID */
+	export_uuid(uuid, &op->node.uuid);
+	sz = stm_data_write(data, m, c, false, uuid, sizeof(op->node.uuid));
+	if (sz <= 0)
+		return sz;
+
+	/* [LENGTH] */
+	if (op->node.do_len) {
+		u16 length = count;
+
+		sz = data->packet(data, m, c, STP_PACKET_DATA, 0, 2,
+				  (u8 *)&length);
+		if (sz <= 0)
+			return sz;
+	}
+
+	/* [TIMESTAMP] */
+	if (header & MIPI_SYST_OPT_TS) {
+		u64 ts = ktime_get_real_ns();
+
+		sz = stm_data_write(data, m, c, false, &ts, sizeof(ts));
+		if (sz <= 0)
+			return sz;
+	}
+
+	/* DATA */
+	sz = stm_data_write(data, m, c, false, buf, count);
+	if (sz > 0)
+		data->packet(data, m, c, STP_PACKET_FLAG, 0, 0, &nil);
+
+	return sz;
+}
+
+static const struct stm_protocol_driver sys_t_pdrv = {
+	.owner			= THIS_MODULE,
+	.name			= "p_sys-t",
+	.priv_sz		= sizeof(struct sys_t_policy_node),
+	.write			= sys_t_write,
+	.policy_attr		= sys_t_policy_attrs,
+	.policy_node_init	= sys_t_policy_node_init,
+	.output_open		= sys_t_output_open,
+	.output_close		= sys_t_output_close,
+};
+
+static int sys_t_stm_init(void)
+{
+	return stm_register_protocol(&sys_t_pdrv);
+}
+
+static void sys_t_stm_exit(void)
+{
+	stm_unregister_protocol(&sys_t_pdrv);
+}
+
+module_init(sys_t_stm_init);
+module_exit(sys_t_stm_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("MIPI SyS-T STM framing protocol driver");
+MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>");
diff --git a/drivers/hwtracing/stm/policy.c b/drivers/hwtracing/stm/policy.c
new file mode 100644
index 0000000000..42103c3a17
--- /dev/null
+++ b/drivers/hwtracing/stm/policy.c
@@ -0,0 +1,570 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * System Trace Module (STM) master/channel allocation policy management
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * A master/channel allocation policy allows mapping string identifiers to
+ * master and channel ranges, where allocation can be done.
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/configfs.h>
+#include <linux/slab.h>
+#include <linux/stm.h>
+#include "stm.h"
+
+/*
+ * STP Master/Channel allocation policy configfs layout.
+ */
+
+struct stp_policy {
+	struct config_group	group;
+	struct stm_device	*stm;
+};
+
+struct stp_policy_node {
+	struct config_group	group;
+	struct stp_policy	*policy;
+	unsigned int		first_master;
+	unsigned int		last_master;
+	unsigned int		first_channel;
+	unsigned int		last_channel;
+	/* this is the one that's exposed to the attributes */
+	unsigned char		priv[];
+};
+
+void *stp_policy_node_priv(struct stp_policy_node *pn)
+{
+	if (!pn)
+		return NULL;
+
+	return pn->priv;
+}
+
+static struct configfs_subsystem stp_policy_subsys;
+
+void stp_policy_node_get_ranges(struct stp_policy_node *policy_node,
+				unsigned int *mstart, unsigned int *mend,
+				unsigned int *cstart, unsigned int *cend)
+{
+	*mstart	= policy_node->first_master;
+	*mend	= policy_node->last_master;
+	*cstart	= policy_node->first_channel;
+	*cend	= policy_node->last_channel;
+}
+
+static inline struct stp_policy *to_stp_policy(struct config_item *item)
+{
+	return item ?
+		container_of(to_config_group(item), struct stp_policy, group) :
+		NULL;
+}
+
+static inline struct stp_policy_node *
+to_stp_policy_node(struct config_item *item)
+{
+	return item ?
+		container_of(to_config_group(item), struct stp_policy_node,
+			     group) :
+		NULL;
+}
+
+void *to_pdrv_policy_node(struct config_item *item)
+{
+	struct stp_policy_node *node = to_stp_policy_node(item);
+
+	return stp_policy_node_priv(node);
+}
+EXPORT_SYMBOL_GPL(to_pdrv_policy_node);
+
+static ssize_t
+stp_policy_node_masters_show(struct config_item *item, char *page)
+{
+	struct stp_policy_node *policy_node = to_stp_policy_node(item);
+	ssize_t count;
+
+	count = sprintf(page, "%u %u\n", policy_node->first_master,
+			policy_node->last_master);
+
+	return count;
+}
+
+static ssize_t
+stp_policy_node_masters_store(struct config_item *item, const char *page,
+			      size_t count)
+{
+	struct stp_policy_node *policy_node = to_stp_policy_node(item);
+	unsigned int first, last;
+	struct stm_device *stm;
+	char *p = (char *)page;
+	ssize_t ret = -ENODEV;
+
+	if (sscanf(p, "%u %u", &first, &last) != 2)
+		return -EINVAL;
+
+	mutex_lock(&stp_policy_subsys.su_mutex);
+	stm = policy_node->policy->stm;
+	if (!stm)
+		goto unlock;
+
+	/* must be within [sw_start..sw_end], which is an inclusive range */
+	if (first > last || first < stm->data->sw_start ||
+	    last > stm->data->sw_end) {
+		ret = -ERANGE;
+		goto unlock;
+	}
+
+	ret = count;
+	policy_node->first_master = first;
+	policy_node->last_master = last;
+
+unlock:
+	mutex_unlock(&stp_policy_subsys.su_mutex);
+
+	return ret;
+}
+
+static ssize_t
+stp_policy_node_channels_show(struct config_item *item, char *page)
+{
+	struct stp_policy_node *policy_node = to_stp_policy_node(item);
+	ssize_t count;
+
+	count = sprintf(page, "%u %u\n", policy_node->first_channel,
+			policy_node->last_channel);
+
+	return count;
+}
+
+static ssize_t
+stp_policy_node_channels_store(struct config_item *item, const char *page,
+			       size_t count)
+{
+	struct stp_policy_node *policy_node = to_stp_policy_node(item);
+	unsigned int first, last;
+	struct stm_device *stm;
+	char *p = (char *)page;
+	ssize_t ret = -ENODEV;
+
+	if (sscanf(p, "%u %u", &first, &last) != 2)
+		return -EINVAL;
+
+	mutex_lock(&stp_policy_subsys.su_mutex);
+	stm = policy_node->policy->stm;
+	if (!stm)
+		goto unlock;
+
+	if (first > INT_MAX || last > INT_MAX || first > last ||
+	    last >= stm->data->sw_nchannels) {
+		ret = -ERANGE;
+		goto unlock;
+	}
+
+	ret = count;
+	policy_node->first_channel = first;
+	policy_node->last_channel = last;
+
+unlock:
+	mutex_unlock(&stp_policy_subsys.su_mutex);
+
+	return ret;
+}
+
+static void stp_policy_node_release(struct config_item *item)
+{
+	struct stp_policy_node *node = to_stp_policy_node(item);
+
+	kfree(node);
+}
+
+static struct configfs_item_operations stp_policy_node_item_ops = {
+	.release		= stp_policy_node_release,
+};
+
+CONFIGFS_ATTR(stp_policy_node_, masters);
+CONFIGFS_ATTR(stp_policy_node_, channels);
+
+static struct configfs_attribute *stp_policy_node_attrs[] = {
+	&stp_policy_node_attr_masters,
+	&stp_policy_node_attr_channels,
+	NULL,
+};
+
+static const struct config_item_type stp_policy_type;
+static const struct config_item_type stp_policy_node_type;
+
+const struct config_item_type *
+get_policy_node_type(struct configfs_attribute **attrs)
+{
+	struct config_item_type *type;
+	struct configfs_attribute **merged;
+
+	type = kmemdup(&stp_policy_node_type, sizeof(stp_policy_node_type),
+		       GFP_KERNEL);
+	if (!type)
+		return NULL;
+
+	merged = memcat_p(stp_policy_node_attrs, attrs);
+	if (!merged) {
+		kfree(type);
+		return NULL;
+	}
+
+	type->ct_attrs = merged;
+
+	return type;
+}
+
+static struct config_group *
+stp_policy_node_make(struct config_group *group, const char *name)
+{
+	const struct config_item_type *type = &stp_policy_node_type;
+	struct stp_policy_node *policy_node, *parent_node;
+	const struct stm_protocol_driver *pdrv;
+	struct stp_policy *policy;
+
+	if (group->cg_item.ci_type == &stp_policy_type) {
+		policy = container_of(group, struct stp_policy, group);
+	} else {
+		parent_node = container_of(group, struct stp_policy_node,
+					   group);
+		policy = parent_node->policy;
+	}
+
+	if (!policy->stm)
+		return ERR_PTR(-ENODEV);
+
+	pdrv = policy->stm->pdrv;
+	policy_node =
+		kzalloc(offsetof(struct stp_policy_node, priv[pdrv->priv_sz]),
+			GFP_KERNEL);
+	if (!policy_node)
+		return ERR_PTR(-ENOMEM);
+
+	if (pdrv->policy_node_init)
+		pdrv->policy_node_init((void *)policy_node->priv);
+
+	if (policy->stm->pdrv_node_type)
+		type = policy->stm->pdrv_node_type;
+
+	config_group_init_type_name(&policy_node->group, name, type);
+
+	policy_node->policy = policy;
+
+	/* default values for the attributes */
+	policy_node->first_master = policy->stm->data->sw_start;
+	policy_node->last_master = policy->stm->data->sw_end;
+	policy_node->first_channel = 0;
+	policy_node->last_channel = policy->stm->data->sw_nchannels - 1;
+
+	return &policy_node->group;
+}
+
+static void
+stp_policy_node_drop(struct config_group *group, struct config_item *item)
+{
+	config_item_put(item);
+}
+
+static struct configfs_group_operations stp_policy_node_group_ops = {
+	.make_group	= stp_policy_node_make,
+	.drop_item	= stp_policy_node_drop,
+};
+
+static const struct config_item_type stp_policy_node_type = {
+	.ct_item_ops	= &stp_policy_node_item_ops,
+	.ct_group_ops	= &stp_policy_node_group_ops,
+	.ct_attrs	= stp_policy_node_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+/*
+ * Root group: policies.
+ */
+static ssize_t stp_policy_device_show(struct config_item *item,
+				      char *page)
+{
+	struct stp_policy *policy = to_stp_policy(item);
+	ssize_t count;
+
+	count = sprintf(page, "%s\n",
+			(policy && policy->stm) ?
+			policy->stm->data->name :
+			"<none>");
+
+	return count;
+}
+
+CONFIGFS_ATTR_RO(stp_policy_, device);
+
+static ssize_t stp_policy_protocol_show(struct config_item *item,
+					char *page)
+{
+	struct stp_policy *policy = to_stp_policy(item);
+	ssize_t count;
+
+	count = sprintf(page, "%s\n",
+			(policy && policy->stm) ?
+			policy->stm->pdrv->name :
+			"<none>");
+
+	return count;
+}
+
+CONFIGFS_ATTR_RO(stp_policy_, protocol);
+
+static struct configfs_attribute *stp_policy_attrs[] = {
+	&stp_policy_attr_device,
+	&stp_policy_attr_protocol,
+	NULL,
+};
+
+void stp_policy_unbind(struct stp_policy *policy)
+{
+	struct stm_device *stm = policy->stm;
+
+	/*
+	 * stp_policy_release() will not call here if the policy is already
+	 * unbound; other users should not either, as no link exists between
+	 * this policy and anything else in that case
+	 */
+	if (WARN_ON_ONCE(!policy->stm))
+		return;
+
+	lockdep_assert_held(&stm->policy_mutex);
+
+	stm->policy = NULL;
+	policy->stm = NULL;
+
+	/*
+	 * Drop the reference on the protocol driver and lose the link.
+	 */
+	stm_put_protocol(stm->pdrv);
+	stm->pdrv = NULL;
+	stm_put_device(stm);
+}
+
+static void stp_policy_release(struct config_item *item)
+{
+	struct stp_policy *policy = to_stp_policy(item);
+	struct stm_device *stm = policy->stm;
+
+	/* a policy *can* be unbound and still exist in configfs tree */
+	if (!stm)
+		return;
+
+	mutex_lock(&stm->policy_mutex);
+	stp_policy_unbind(policy);
+	mutex_unlock(&stm->policy_mutex);
+
+	kfree(policy);
+}
+
+static struct configfs_item_operations stp_policy_item_ops = {
+	.release		= stp_policy_release,
+};
+
+static struct configfs_group_operations stp_policy_group_ops = {
+	.make_group	= stp_policy_node_make,
+};
+
+static const struct config_item_type stp_policy_type = {
+	.ct_item_ops	= &stp_policy_item_ops,
+	.ct_group_ops	= &stp_policy_group_ops,
+	.ct_attrs	= stp_policy_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct config_group *
+stp_policy_make(struct config_group *group, const char *name)
+{
+	const struct config_item_type *pdrv_node_type;
+	const struct stm_protocol_driver *pdrv;
+	char *devname, *proto, *p;
+	struct config_group *ret;
+	struct stm_device *stm;
+	int err;
+
+	devname = kasprintf(GFP_KERNEL, "%s", name);
+	if (!devname)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 * node must look like <device_name>.<policy_name>, where
+	 * <device_name> is the name of an existing stm device; may
+	 *               contain dots;
+	 * <policy_name> is an arbitrary string; may not contain dots
+	 * <device_name>:<protocol_name>.<policy_name>
+	 */
+	p = strrchr(devname, '.');
+	if (!p) {
+		kfree(devname);
+		return ERR_PTR(-EINVAL);
+	}
+
+	*p = '\0';
+
+	/*
+	 * look for ":<protocol_name>":
+	 *  + no protocol suffix: fall back to whatever is available;
+	 *  + unknown protocol: fail the whole thing
+	 */
+	proto = strrchr(devname, ':');
+	if (proto)
+		*proto++ = '\0';
+
+	stm = stm_find_device(devname);
+	if (!stm) {
+		kfree(devname);
+		return ERR_PTR(-ENODEV);
+	}
+
+	err = stm_lookup_protocol(proto, &pdrv, &pdrv_node_type);
+	kfree(devname);
+
+	if (err) {
+		stm_put_device(stm);
+		return ERR_PTR(-ENODEV);
+	}
+
+	mutex_lock(&stm->policy_mutex);
+	if (stm->policy) {
+		ret = ERR_PTR(-EBUSY);
+		goto unlock_policy;
+	}
+
+	stm->policy = kzalloc(sizeof(*stm->policy), GFP_KERNEL);
+	if (!stm->policy) {
+		ret = ERR_PTR(-ENOMEM);
+		goto unlock_policy;
+	}
+
+	config_group_init_type_name(&stm->policy->group, name,
+				    &stp_policy_type);
+
+	stm->pdrv = pdrv;
+	stm->pdrv_node_type = pdrv_node_type;
+	stm->policy->stm = stm;
+	ret = &stm->policy->group;
+
+unlock_policy:
+	mutex_unlock(&stm->policy_mutex);
+
+	if (IS_ERR(ret)) {
+		/*
+		 * pdrv and stm->pdrv at this point can be quite different,
+		 * and only one of them needs to be 'put'
+		 */
+		stm_put_protocol(pdrv);
+		stm_put_device(stm);
+	}
+
+	return ret;
+}
+
+static struct configfs_group_operations stp_policy_root_group_ops = {
+	.make_group	= stp_policy_make,
+};
+
+static const struct config_item_type stp_policy_root_type = {
+	.ct_group_ops	= &stp_policy_root_group_ops,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct configfs_subsystem stp_policy_subsys = {
+	.su_group = {
+		.cg_item = {
+			.ci_namebuf	= "stp-policy",
+			.ci_type	= &stp_policy_root_type,
+		},
+	},
+};
+
+/*
+ * Lock the policy mutex from the outside
+ */
+static struct stp_policy_node *
+__stp_policy_node_lookup(struct stp_policy *policy, char *s)
+{
+	struct stp_policy_node *policy_node, *ret = NULL;
+	struct list_head *head = &policy->group.cg_children;
+	struct config_item *item;
+	char *start, *end = s;
+
+	if (list_empty(head))
+		return NULL;
+
+next:
+	for (;;) {
+		start = strsep(&end, "/");
+		if (!start)
+			break;
+
+		if (!*start)
+			continue;
+
+		list_for_each_entry(item, head, ci_entry) {
+			policy_node = to_stp_policy_node(item);
+
+			if (!strcmp(start,
+				    policy_node->group.cg_item.ci_name)) {
+				ret = policy_node;
+
+				if (!end)
+					goto out;
+
+				head = &policy_node->group.cg_children;
+				goto next;
+			}
+		}
+		break;
+	}
+
+out:
+	return ret;
+}
+
+
+struct stp_policy_node *
+stp_policy_node_lookup(struct stm_device *stm, char *s)
+{
+	struct stp_policy_node *policy_node = NULL;
+
+	mutex_lock(&stp_policy_subsys.su_mutex);
+
+	mutex_lock(&stm->policy_mutex);
+	if (stm->policy)
+		policy_node = __stp_policy_node_lookup(stm->policy, s);
+	mutex_unlock(&stm->policy_mutex);
+
+	if (policy_node)
+		config_item_get(&policy_node->group.cg_item);
+	else
+		mutex_unlock(&stp_policy_subsys.su_mutex);
+
+	return policy_node;
+}
+
+void stp_policy_node_put(struct stp_policy_node *policy_node)
+{
+	lockdep_assert_held(&stp_policy_subsys.su_mutex);
+
+	mutex_unlock(&stp_policy_subsys.su_mutex);
+	config_item_put(&policy_node->group.cg_item);
+}
+
+int __init stp_configfs_init(void)
+{
+	config_group_init(&stp_policy_subsys.su_group);
+	mutex_init(&stp_policy_subsys.su_mutex);
+	return configfs_register_subsystem(&stp_policy_subsys);
+}
+
+void __exit stp_configfs_exit(void)
+{
+	configfs_unregister_subsystem(&stp_policy_subsys);
+}
diff --git a/drivers/hwtracing/stm/stm.h b/drivers/hwtracing/stm/stm.h
new file mode 100644
index 0000000000..a9be49fc7a
--- /dev/null
+++ b/drivers/hwtracing/stm/stm.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * System Trace Module (STM) infrastructure
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * STM class implements generic infrastructure for  System Trace Module devices
+ * as defined in MIPI STPv2 specification.
+ */
+
+#ifndef _STM_STM_H_
+#define _STM_STM_H_
+
+#include <linux/configfs.h>
+
+struct stp_policy;
+struct stp_policy_node;
+struct stm_protocol_driver;
+
+int stp_configfs_init(void);
+void stp_configfs_exit(void);
+
+void *stp_policy_node_priv(struct stp_policy_node *pn);
+
+struct stp_master {
+	unsigned int	nr_free;
+	unsigned long	chan_map[];
+};
+
+struct stm_device {
+	struct device		dev;
+	struct module		*owner;
+	struct stp_policy	*policy;
+	struct mutex		policy_mutex;
+	int			major;
+	unsigned int		sw_nmasters;
+	struct stm_data		*data;
+	struct mutex		link_mutex;
+	spinlock_t		link_lock;
+	struct list_head	link_list;
+	/* framing protocol in use */
+	const struct stm_protocol_driver	*pdrv;
+	const struct config_item_type		*pdrv_node_type;
+	/* master allocation */
+	spinlock_t		mc_lock;
+	struct stp_master	*masters[];
+};
+
+#define to_stm_device(_d)				\
+	container_of((_d), struct stm_device, dev)
+
+struct stp_policy_node *
+stp_policy_node_lookup(struct stm_device *stm, char *s);
+void stp_policy_node_put(struct stp_policy_node *policy_node);
+void stp_policy_unbind(struct stp_policy *policy);
+
+void stp_policy_node_get_ranges(struct stp_policy_node *policy_node,
+				unsigned int *mstart, unsigned int *mend,
+				unsigned int *cstart, unsigned int *cend);
+
+const struct config_item_type *
+get_policy_node_type(struct configfs_attribute **attrs);
+
+struct stm_output {
+	spinlock_t		lock;
+	unsigned int		master;
+	unsigned int		channel;
+	unsigned int		nr_chans;
+	void			*pdrv_private;
+};
+
+struct stm_file {
+	struct stm_device	*stm;
+	struct stm_output	output;
+};
+
+struct stm_device *stm_find_device(const char *name);
+void stm_put_device(struct stm_device *stm);
+
+struct stm_source_device {
+	struct device		dev;
+	struct stm_source_data	*data;
+	spinlock_t		link_lock;
+	struct stm_device __rcu	*link;
+	struct list_head	link_entry;
+	/* one output per stm_source device */
+	struct stm_output	output;
+};
+
+#define to_stm_source_device(_d)				\
+	container_of((_d), struct stm_source_device, dev)
+
+void *to_pdrv_policy_node(struct config_item *item);
+
+struct stm_protocol_driver {
+	struct module	*owner;
+	const char	*name;
+	ssize_t		(*write)(struct stm_data *data,
+				 struct stm_output *output, unsigned int chan,
+				 const char *buf, size_t count);
+	void		(*policy_node_init)(void *arg);
+	int		(*output_open)(void *priv, struct stm_output *output);
+	void		(*output_close)(struct stm_output *output);
+	ssize_t		priv_sz;
+	struct configfs_attribute	**policy_attr;
+};
+
+int stm_register_protocol(const struct stm_protocol_driver *pdrv);
+void stm_unregister_protocol(const struct stm_protocol_driver *pdrv);
+int stm_lookup_protocol(const char *name,
+			const struct stm_protocol_driver **pdrv,
+			const struct config_item_type **type);
+void stm_put_protocol(const struct stm_protocol_driver *pdrv);
+ssize_t stm_data_write(struct stm_data *data, unsigned int m,
+		       unsigned int c, bool ts_first, const void *buf,
+		       size_t count);
+
+#endif /* _STM_STM_H_ */
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-11 08:27:49 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-11 08:27:49 +0000
commit	ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
tree	b2d64bc10158fdd5497876388cd68142ca374ed3 /drivers/hwtracing
parent	Initial commit. (diff)
download	linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip