Adding upstream version 6.6.15.upstream/6.6.15

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-11 08:27:49 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-11 08:27:49 +0000
commit: ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
tree: b2d64bc10158fdd5497876388cd68142ca374ed3 /drivers/perf
parent: Initial commit. (diff)
download: linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz
linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip
50 files changed, 35103 insertions, 0 deletions
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
new file mode 100644
index 0000000000..273d67ecf6
--- /dev/null
+++ b/drivers/perf/Kconfig
@@ -0,0 +1,237 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Performance Monitor Drivers
+#
+
+menu "Performance monitor support"
+	depends on PERF_EVENTS
+
+config ARM_CCI_PMU
+	tristate "ARM CCI PMU driver"
+	depends on (ARM && CPU_V7) || ARM64
+	select ARM_CCI
+	help
+	  Support for PMU events monitoring on the ARM CCI (Cache Coherent
+	  Interconnect) family of products.
+
+	  If compiled as a module, it will be called arm-cci.
+
+config ARM_CCI400_PMU
+	bool "support CCI-400"
+	default y
+	depends on ARM_CCI_PMU
+	select ARM_CCI400_COMMON
+	help
+	  CCI-400 provides 4 independent event counters counting events related
+	  to the connected slave/master interfaces, plus a cycle counter.
+
+config ARM_CCI5xx_PMU
+	bool "support CCI-500/CCI-550"
+	default y
+	depends on ARM_CCI_PMU
+	help
+	  CCI-500/CCI-550 both provide 8 independent event counters, which can
+	  count events pertaining to the slave/master interfaces as well as the
+	  internal events to the CCI.
+
+config ARM_CCN
+	tristate "ARM CCN driver support"
+	depends on ARM || ARM64 || COMPILE_TEST
+	help
+	  PMU (perf) driver supporting the ARM CCN (Cache Coherent Network)
+	  interconnect.
+
+config ARM_CMN
+	tristate "Arm CMN-600 PMU support"
+	depends on ARM64 || COMPILE_TEST
+	help
+	  Support for PMU events monitoring on the Arm CMN-600 Coherent Mesh
+	  Network interconnect.
+
+config ARM_PMU
+	depends on ARM || ARM64
+	bool "ARM PMU framework"
+	default y
+	help
+	  Say y if you want to use CPU performance monitors on ARM-based
+	  systems.
+
+config RISCV_PMU
+	depends on RISCV
+	bool "RISC-V PMU framework"
+	default y
+	help
+	  Say y if you want to use CPU performance monitors on RISCV-based
+	  systems. This provides the core PMU framework that abstracts common
+	  PMU functionalities in a core library so that different PMU drivers
+	  can reuse it.
+
+config RISCV_PMU_LEGACY
+	depends on RISCV_PMU
+	bool "RISC-V legacy PMU implementation"
+	default y
+	help
+	  Say y if you want to use the legacy CPU performance monitor
+	  implementation on RISC-V based systems. This only allows counting
+	  of cycle/instruction counter and doesn't support counter overflow,
+	  or programmable counters. It will be removed in future.
+
+config RISCV_PMU_SBI
+	depends on RISCV_PMU && RISCV_SBI
+	bool "RISC-V PMU based on SBI PMU extension"
+	default y
+	help
+	  Say y if you want to use the CPU performance monitor
+	  using SBI PMU extension on RISC-V based systems. This option provides
+	  full perf feature support i.e. counter overflow, privilege mode
+	  filtering, counter configuration.
+
+config ARM_PMU_ACPI
+	depends on ARM_PMU && ACPI
+	def_bool y
+
+config ARM_SMMU_V3_PMU
+	 tristate "ARM SMMUv3 Performance Monitors Extension"
+	 depends on ARM64 || (COMPILE_TEST && 64BIT)
+	 depends on GENERIC_MSI_IRQ
+	   help
+	   Provides support for the ARM SMMUv3 Performance Monitor Counter
+	   Groups (PMCG), which provide monitoring of transactions passing
+	   through the SMMU and allow the resulting information to be filtered
+	   based on the Stream ID of the corresponding master.
+
+config ARM_PMUV3
+	depends on HW_PERF_EVENTS && ((ARM && CPU_V7) || ARM64)
+	bool "ARM PMUv3 support" if !ARM64
+	default ARM64
+	  help
+	  Say y if you want to use the ARM performance monitor unit (PMU)
+	  version 3. The PMUv3 is the CPU performance monitors on ARMv8
+	  (aarch32 and aarch64) systems that implement the PMUv3
+	  architecture.
+
+config ARM_DSU_PMU
+	tristate "ARM DynamIQ Shared Unit (DSU) PMU"
+	depends on ARM64
+	  help
+	  Provides support for performance monitor unit in ARM DynamIQ Shared
+	  Unit (DSU). The DSU integrates one or more cores with an L3 memory
+	  system, control logic. The PMU allows counting various events related
+	  to DSU.
+
+config FSL_IMX8_DDR_PMU
+	tristate "Freescale i.MX8 DDR perf monitor"
+	depends on ARCH_MXC || COMPILE_TEST
+	  help
+	  Provides support for the DDR performance monitor in i.MX8, which
+	  can give information about memory throughput and other related
+	  events.
+
+config FSL_IMX9_DDR_PMU
+	tristate "Freescale i.MX9 DDR perf monitor"
+	depends on ARCH_MXC
+	 help
+	 Provides support for the DDR performance monitor in i.MX9, which
+	 can give information about memory throughput and other related
+	 events.
+
+config QCOM_L2_PMU
+	bool "Qualcomm Technologies L2-cache PMU"
+	depends on ARCH_QCOM && ARM64 && ACPI
+	select QCOM_KRYO_L2_ACCESSORS
+	  help
+	  Provides support for the L2 cache performance monitor unit (PMU)
+	  in Qualcomm Technologies processors.
+	  Adds the L2 cache PMU into the perf events subsystem for
+	  monitoring L2 cache events.
+
+config QCOM_L3_PMU
+	bool "Qualcomm Technologies L3-cache PMU"
+	depends on ARCH_QCOM && ARM64 && ACPI
+	select QCOM_IRQ_COMBINER
+	help
+	   Provides support for the L3 cache performance monitor unit (PMU)
+	   in Qualcomm Technologies processors.
+	   Adds the L3 cache PMU into the perf events subsystem for
+	   monitoring L3 cache events.
+
+config THUNDERX2_PMU
+	tristate "Cavium ThunderX2 SoC PMU UNCORE"
+	depends on ARCH_THUNDER2 || COMPILE_TEST
+	depends on NUMA && ACPI
+	default m
+	help
+	   Provides support for ThunderX2 UNCORE events.
+	   The SoC has PMU support in its L3 cache controller (L3C) and
+	   in the DDR4 Memory Controller (DMC).
+
+config XGENE_PMU
+        depends on ARCH_XGENE || (COMPILE_TEST && 64BIT)
+        bool "APM X-Gene SoC PMU"
+        default n
+        help
+          Say y if you want to use APM X-Gene SoC performance monitors.
+
+config ARM_SPE_PMU
+	tristate "Enable support for the ARMv8.2 Statistical Profiling Extension"
+	depends on ARM64
+	help
+	  Enable perf support for the ARMv8.2 Statistical Profiling
+	  Extension, which provides periodic sampling of operations in
+	  the CPU pipeline and reports this via the perf AUX interface.
+
+config ARM_DMC620_PMU
+	tristate "Enable PMU support for the ARM DMC-620 memory controller"
+	depends on (ARM64 && ACPI) || COMPILE_TEST
+	help
+	  Support for PMU events monitoring on the ARM DMC-620 memory
+	  controller.
+
+config MARVELL_CN10K_TAD_PMU
+	tristate "Marvell CN10K LLC-TAD PMU"
+	depends on ARCH_THUNDER || (COMPILE_TEST && 64BIT)
+	help
+	  Provides support for Last-Level cache Tag-and-data Units (LLC-TAD)
+	  performance monitors on CN10K family silicons.
+
+config APPLE_M1_CPU_PMU
+	bool "Apple M1 CPU PMU support"
+	depends on ARM_PMU && ARCH_APPLE
+	help
+	  Provides support for the non-architectural CPU PMUs present on
+	  the Apple M1 SoCs and derivatives.
+
+config ALIBABA_UNCORE_DRW_PMU
+	tristate "Alibaba T-Head Yitian 710 DDR Sub-system Driveway PMU driver"
+	depends on (ARM64 && ACPI) || COMPILE_TEST
+	help
+	  Support for Driveway PMU events monitoring on Yitian 710 DDR
+	  Sub-system.
+
+source "drivers/perf/hisilicon/Kconfig"
+
+config MARVELL_CN10K_DDR_PMU
+	tristate "Enable MARVELL CN10K DRAM Subsystem(DSS) PMU Support"
+	depends on ARCH_THUNDER || (COMPILE_TEST && 64BIT)
+	help
+	  Enable perf support for Marvell DDR Performance monitoring
+	  event on CN10K platform.
+
+source "drivers/perf/arm_cspmu/Kconfig"
+
+source "drivers/perf/amlogic/Kconfig"
+
+config CXL_PMU
+	tristate "CXL Performance Monitoring Unit"
+	depends on CXL_BUS
+	help
+	  Support performance monitoring as defined in CXL rev 3.0
+	  section 13.2: Performance Monitoring. CXL components may have
+	  one or more CXL Performance Monitoring Units (CPMUs).
+
+	  Say 'y/m' to enable a driver that will attach to performance
+	  monitoring units and provide standard perf based interfaces.
+
+	  If unsure say 'm'.
+
+endmenu
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
new file mode 100644
index 0000000000..16b3ec4db9
--- /dev/null
+++ b/drivers/perf/Makefile
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_ARM_CCI_PMU) += arm-cci.o
+obj-$(CONFIG_ARM_CCN) += arm-ccn.o
+obj-$(CONFIG_ARM_CMN) += arm-cmn.o
+obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o
+obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o
+obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
+obj-$(CONFIG_ARM_PMUV3) += arm_pmuv3.o
+obj-$(CONFIG_ARM_SMMU_V3_PMU) += arm_smmuv3_pmu.o
+obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o
+obj-$(CONFIG_FSL_IMX9_DDR_PMU) += fsl_imx9_ddr_perf.o
+obj-$(CONFIG_HISI_PMU) += hisilicon/
+obj-$(CONFIG_QCOM_L2_PMU)	+= qcom_l2_pmu.o
+obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
+obj-$(CONFIG_RISCV_PMU) += riscv_pmu.o
+obj-$(CONFIG_RISCV_PMU_LEGACY) += riscv_pmu_legacy.o
+obj-$(CONFIG_RISCV_PMU_SBI) += riscv_pmu_sbi.o
+obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
+obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
+obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
+obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o
+obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o
+obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o
+obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o
+obj-$(CONFIG_ALIBABA_UNCORE_DRW_PMU) += alibaba_uncore_drw_pmu.o
+obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu/
+obj-$(CONFIG_MESON_DDR_PMU) += amlogic/
+obj-$(CONFIG_CXL_PMU) += cxl_pmu.o
diff --git a/drivers/perf/alibaba_uncore_drw_pmu.c b/drivers/perf/alibaba_uncore_drw_pmu.c
new file mode 100644
index 0000000000..19d459a36b
--- /dev/null
+++ b/drivers/perf/alibaba_uncore_drw_pmu.c
@@ -0,0 +1,836 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Alibaba DDR Sub-System Driveway PMU driver
+ *
+ * Copyright (C) 2022 Alibaba Inc
+ */
+
+#define ALI_DRW_PMUNAME		"ali_drw"
+#define ALI_DRW_DRVNAME		ALI_DRW_PMUNAME "_pmu"
+#define pr_fmt(fmt)		ALI_DRW_DRVNAME ": " fmt
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/cpuhotplug.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+#include <linux/rculist.h>
+#include <linux/refcount.h>
+
+
+#define ALI_DRW_PMU_COMMON_MAX_COUNTERS			16
+#define ALI_DRW_PMU_TEST_SEL_COMMON_COUNTER_BASE	19
+
+#define ALI_DRW_PMU_PA_SHIFT			12
+#define ALI_DRW_PMU_CNT_INIT			0x00000000
+#define ALI_DRW_CNT_MAX_PERIOD			0xffffffff
+#define ALI_DRW_PMU_CYCLE_EVT_ID		0x80
+
+#define ALI_DRW_PMU_CNT_CTRL			0xC00
+#define ALI_DRW_PMU_CNT_RST			BIT(2)
+#define ALI_DRW_PMU_CNT_STOP			BIT(1)
+#define ALI_DRW_PMU_CNT_START			BIT(0)
+
+#define ALI_DRW_PMU_CNT_STATE			0xC04
+#define ALI_DRW_PMU_TEST_CTRL			0xC08
+#define ALI_DRW_PMU_CNT_PRELOAD			0xC0C
+
+#define ALI_DRW_PMU_CYCLE_CNT_HIGH_MASK		GENMASK(23, 0)
+#define ALI_DRW_PMU_CYCLE_CNT_LOW_MASK		GENMASK(31, 0)
+#define ALI_DRW_PMU_CYCLE_CNT_HIGH		0xC10
+#define ALI_DRW_PMU_CYCLE_CNT_LOW		0xC14
+
+/* PMU EVENT SEL 0-3 are paired in 32-bit registers on a 4-byte stride */
+#define ALI_DRW_PMU_EVENT_SEL0			0xC68
+/* counter 0-3 use sel0, counter 4-7 use sel1...*/
+#define ALI_DRW_PMU_EVENT_SELn(n) \
+	(ALI_DRW_PMU_EVENT_SEL0 + (n / 4) * 0x4)
+#define ALI_DRW_PMCOM_CNT_EN			BIT(7)
+#define ALI_DRW_PMCOM_CNT_EVENT_MASK		GENMASK(5, 0)
+#define ALI_DRW_PMCOM_CNT_EVENT_OFFSET(n) \
+	(8 * (n % 4))
+
+/* PMU COMMON COUNTER 0-15, are paired in 32-bit registers on a 4-byte stride */
+#define ALI_DRW_PMU_COMMON_COUNTER0		0xC78
+#define ALI_DRW_PMU_COMMON_COUNTERn(n) \
+	(ALI_DRW_PMU_COMMON_COUNTER0 + 0x4 * (n))
+
+#define ALI_DRW_PMU_OV_INTR_ENABLE_CTL		0xCB8
+#define ALI_DRW_PMU_OV_INTR_DISABLE_CTL		0xCBC
+#define ALI_DRW_PMU_OV_INTR_ENABLE_STATUS	0xCC0
+#define ALI_DRW_PMU_OV_INTR_CLR			0xCC4
+#define ALI_DRW_PMU_OV_INTR_STATUS		0xCC8
+#define ALI_DRW_PMCOM_CNT_OV_INTR_MASK		GENMASK(23, 8)
+#define ALI_DRW_PMBW_CNT_OV_INTR_MASK		GENMASK(7, 0)
+#define ALI_DRW_PMU_OV_INTR_MASK		GENMASK_ULL(63, 0)
+
+static int ali_drw_cpuhp_state_num;
+
+static LIST_HEAD(ali_drw_pmu_irqs);
+static DEFINE_MUTEX(ali_drw_pmu_irqs_lock);
+
+struct ali_drw_pmu_irq {
+	struct hlist_node node;
+	struct list_head irqs_node;
+	struct list_head pmus_node;
+	int irq_num;
+	int cpu;
+	refcount_t refcount;
+};
+
+struct ali_drw_pmu {
+	void __iomem *cfg_base;
+	struct device *dev;
+
+	struct list_head pmus_node;
+	struct ali_drw_pmu_irq *irq;
+	int irq_num;
+	int cpu;
+	DECLARE_BITMAP(used_mask, ALI_DRW_PMU_COMMON_MAX_COUNTERS);
+	struct perf_event *events[ALI_DRW_PMU_COMMON_MAX_COUNTERS];
+	int evtids[ALI_DRW_PMU_COMMON_MAX_COUNTERS];
+
+	struct pmu pmu;
+};
+
+#define to_ali_drw_pmu(p) (container_of(p, struct ali_drw_pmu, pmu))
+
+#define DRW_CONFIG_EVENTID		GENMASK(7, 0)
+#define GET_DRW_EVENTID(event)	FIELD_GET(DRW_CONFIG_EVENTID, (event)->attr.config)
+
+static ssize_t ali_drw_pmu_format_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *eattr;
+
+	eattr = container_of(attr, struct dev_ext_attribute, attr);
+
+	return sprintf(buf, "%s\n", (char *)eattr->var);
+}
+
+/*
+ * PMU event attributes
+ */
+static ssize_t ali_drw_pmu_event_show(struct device *dev,
+			       struct device_attribute *attr, char *page)
+{
+	struct dev_ext_attribute *eattr;
+
+	eattr = container_of(attr, struct dev_ext_attribute, attr);
+
+	return sprintf(page, "config=0x%lx\n", (unsigned long)eattr->var);
+}
+
+#define ALI_DRW_PMU_ATTR(_name, _func, _config)                            \
+		(&((struct dev_ext_attribute[]) {                               \
+				{ __ATTR(_name, 0444, _func, NULL), (void *)_config }   \
+		})[0].attr.attr)
+
+#define ALI_DRW_PMU_FORMAT_ATTR(_name, _config)            \
+	ALI_DRW_PMU_ATTR(_name, ali_drw_pmu_format_show, (void *)_config)
+#define ALI_DRW_PMU_EVENT_ATTR(_name, _config)             \
+	ALI_DRW_PMU_ATTR(_name, ali_drw_pmu_event_show, (unsigned long)_config)
+
+static struct attribute *ali_drw_pmu_events_attrs[] = {
+	ALI_DRW_PMU_EVENT_ATTR(hif_rd_or_wr,			0x0),
+	ALI_DRW_PMU_EVENT_ATTR(hif_wr,				0x1),
+	ALI_DRW_PMU_EVENT_ATTR(hif_rd,				0x2),
+	ALI_DRW_PMU_EVENT_ATTR(hif_rmw,				0x3),
+	ALI_DRW_PMU_EVENT_ATTR(hif_hi_pri_rd,			0x4),
+	ALI_DRW_PMU_EVENT_ATTR(dfi_wr_data_cycles,		0x7),
+	ALI_DRW_PMU_EVENT_ATTR(dfi_rd_data_cycles,		0x8),
+	ALI_DRW_PMU_EVENT_ATTR(hpr_xact_when_critical,		0x9),
+	ALI_DRW_PMU_EVENT_ATTR(lpr_xact_when_critical,		0xA),
+	ALI_DRW_PMU_EVENT_ATTR(wr_xact_when_critical,		0xB),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_activate,			0xC),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_rd_or_wr,			0xD),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_rd_activate,		0xE),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_rd,			0xF),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_wr,			0x10),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_mwr,			0x11),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_precharge,			0x12),
+	ALI_DRW_PMU_EVENT_ATTR(precharge_for_rdwr,		0x13),
+	ALI_DRW_PMU_EVENT_ATTR(precharge_for_other,		0x14),
+	ALI_DRW_PMU_EVENT_ATTR(rdwr_transitions,		0x15),
+	ALI_DRW_PMU_EVENT_ATTR(write_combine,			0x16),
+	ALI_DRW_PMU_EVENT_ATTR(war_hazard,			0x17),
+	ALI_DRW_PMU_EVENT_ATTR(raw_hazard,			0x18),
+	ALI_DRW_PMU_EVENT_ATTR(waw_hazard,			0x19),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_enter_selfref_rk0,		0x1A),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_enter_selfref_rk1,		0x1B),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_enter_selfref_rk2,		0x1C),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_enter_selfref_rk3,		0x1D),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_enter_powerdown_rk0,	0x1E),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_enter_powerdown_rk1,	0x1F),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_enter_powerdown_rk2,	0x20),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_enter_powerdown_rk3,	0x21),
+	ALI_DRW_PMU_EVENT_ATTR(selfref_mode_rk0,		0x26),
+	ALI_DRW_PMU_EVENT_ATTR(selfref_mode_rk1,		0x27),
+	ALI_DRW_PMU_EVENT_ATTR(selfref_mode_rk2,		0x28),
+	ALI_DRW_PMU_EVENT_ATTR(selfref_mode_rk3,		0x29),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_refresh,			0x2A),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_crit_ref,			0x2B),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_load_mode,			0x2D),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_zqcl,			0x2E),
+	ALI_DRW_PMU_EVENT_ATTR(visible_window_limit_reached_rd, 0x30),
+	ALI_DRW_PMU_EVENT_ATTR(visible_window_limit_reached_wr, 0x31),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_dqsosc_mpc,		0x34),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_dqsosc_mrr,		0x35),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_tcr_mrr,			0x36),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_zqstart,			0x37),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_zqlatch,			0x38),
+	ALI_DRW_PMU_EVENT_ATTR(chi_txreq,			0x39),
+	ALI_DRW_PMU_EVENT_ATTR(chi_txdat,			0x3A),
+	ALI_DRW_PMU_EVENT_ATTR(chi_rxdat,			0x3B),
+	ALI_DRW_PMU_EVENT_ATTR(chi_rxrsp,			0x3C),
+	ALI_DRW_PMU_EVENT_ATTR(tsz_vio,				0x3D),
+	ALI_DRW_PMU_EVENT_ATTR(cycle,				0x80),
+	NULL,
+};
+
+static struct attribute_group ali_drw_pmu_events_attr_group = {
+	.name = "events",
+	.attrs = ali_drw_pmu_events_attrs,
+};
+
+static struct attribute *ali_drw_pmu_format_attr[] = {
+	ALI_DRW_PMU_FORMAT_ATTR(event, "config:0-7"),
+	NULL,
+};
+
+static const struct attribute_group ali_drw_pmu_format_group = {
+	.name = "format",
+	.attrs = ali_drw_pmu_format_attr,
+};
+
+static ssize_t ali_drw_pmu_cpumask_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(dev_get_drvdata(dev));
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(drw_pmu->cpu));
+}
+
+static struct device_attribute ali_drw_pmu_cpumask_attr =
+		__ATTR(cpumask, 0444, ali_drw_pmu_cpumask_show, NULL);
+
+static struct attribute *ali_drw_pmu_cpumask_attrs[] = {
+	&ali_drw_pmu_cpumask_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group ali_drw_pmu_cpumask_attr_group = {
+	.attrs = ali_drw_pmu_cpumask_attrs,
+};
+
+static ssize_t ali_drw_pmu_identifier_show(struct device *dev,
+					struct device_attribute *attr,
+					char *page)
+{
+	return sysfs_emit(page, "%s\n", "ali_drw_pmu");
+}
+
+static umode_t ali_drw_pmu_identifier_attr_visible(struct kobject *kobj,
+						struct attribute *attr, int n)
+{
+	return attr->mode;
+}
+
+static struct device_attribute ali_drw_pmu_identifier_attr =
+	__ATTR(identifier, 0444, ali_drw_pmu_identifier_show, NULL);
+
+static struct attribute *ali_drw_pmu_identifier_attrs[] = {
+	&ali_drw_pmu_identifier_attr.attr,
+	NULL
+};
+
+static const struct attribute_group ali_drw_pmu_identifier_attr_group = {
+	.attrs = ali_drw_pmu_identifier_attrs,
+	.is_visible = ali_drw_pmu_identifier_attr_visible
+};
+
+static const struct attribute_group *ali_drw_pmu_attr_groups[] = {
+	&ali_drw_pmu_events_attr_group,
+	&ali_drw_pmu_cpumask_attr_group,
+	&ali_drw_pmu_format_group,
+	&ali_drw_pmu_identifier_attr_group,
+	NULL,
+};
+
+/* find a counter for event, then in add func, hw.idx will equal to counter */
+static int ali_drw_get_counter_idx(struct perf_event *event)
+{
+	struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu);
+	int idx;
+
+	for (idx = 0; idx < ALI_DRW_PMU_COMMON_MAX_COUNTERS; ++idx) {
+		if (!test_and_set_bit(idx, drw_pmu->used_mask))
+			return idx;
+	}
+
+	/* The counters are all in use. */
+	return -EBUSY;
+}
+
+static u64 ali_drw_pmu_read_counter(struct perf_event *event)
+{
+	struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu);
+	u64 cycle_high, cycle_low;
+
+	if (GET_DRW_EVENTID(event) == ALI_DRW_PMU_CYCLE_EVT_ID) {
+		cycle_high = readl(drw_pmu->cfg_base + ALI_DRW_PMU_CYCLE_CNT_HIGH);
+		cycle_high &= ALI_DRW_PMU_CYCLE_CNT_HIGH_MASK;
+		cycle_low = readl(drw_pmu->cfg_base + ALI_DRW_PMU_CYCLE_CNT_LOW);
+		cycle_low &= ALI_DRW_PMU_CYCLE_CNT_LOW_MASK;
+		return (cycle_high << 32 | cycle_low);
+	}
+
+	return readl(drw_pmu->cfg_base +
+		     ALI_DRW_PMU_COMMON_COUNTERn(event->hw.idx));
+}
+
+static void ali_drw_pmu_event_update(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 delta, prev, now;
+
+	do {
+		prev = local64_read(&hwc->prev_count);
+		now = ali_drw_pmu_read_counter(event);
+	} while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
+
+	/* handle overflow. */
+	delta = now - prev;
+	if (GET_DRW_EVENTID(event) == ALI_DRW_PMU_CYCLE_EVT_ID)
+		delta &= ALI_DRW_PMU_OV_INTR_MASK;
+	else
+		delta &= ALI_DRW_CNT_MAX_PERIOD;
+	local64_add(delta, &event->count);
+}
+
+static void ali_drw_pmu_event_set_period(struct perf_event *event)
+{
+	u64 pre_val;
+	struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu);
+
+	/* set a preload counter for test purpose */
+	writel(ALI_DRW_PMU_TEST_SEL_COMMON_COUNTER_BASE + event->hw.idx,
+	       drw_pmu->cfg_base + ALI_DRW_PMU_TEST_CTRL);
+
+	/* set conunter initial value */
+	pre_val = ALI_DRW_PMU_CNT_INIT;
+	writel(pre_val, drw_pmu->cfg_base + ALI_DRW_PMU_CNT_PRELOAD);
+	local64_set(&event->hw.prev_count, pre_val);
+
+	/* set sel mode to zero to start test */
+	writel(0x0, drw_pmu->cfg_base + ALI_DRW_PMU_TEST_CTRL);
+}
+
+static void ali_drw_pmu_enable_counter(struct perf_event *event)
+{
+	u32 val, subval, reg, shift;
+	int counter = event->hw.idx;
+	struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu);
+
+	reg = ALI_DRW_PMU_EVENT_SELn(counter);
+	val = readl(drw_pmu->cfg_base + reg);
+	subval = FIELD_PREP(ALI_DRW_PMCOM_CNT_EN, 1) |
+		 FIELD_PREP(ALI_DRW_PMCOM_CNT_EVENT_MASK, drw_pmu->evtids[counter]);
+
+	shift = ALI_DRW_PMCOM_CNT_EVENT_OFFSET(counter);
+	val &= ~(GENMASK(7, 0) << shift);
+	val |= subval << shift;
+
+	writel(val, drw_pmu->cfg_base + reg);
+}
+
+static void ali_drw_pmu_disable_counter(struct perf_event *event)
+{
+	u32 val, reg, subval, shift;
+	struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu);
+	int counter = event->hw.idx;
+
+	reg = ALI_DRW_PMU_EVENT_SELn(counter);
+	val = readl(drw_pmu->cfg_base + reg);
+	subval = FIELD_PREP(ALI_DRW_PMCOM_CNT_EN, 0) |
+		 FIELD_PREP(ALI_DRW_PMCOM_CNT_EVENT_MASK, 0);
+
+	shift = ALI_DRW_PMCOM_CNT_EVENT_OFFSET(counter);
+	val &= ~(GENMASK(7, 0) << shift);
+	val |= subval << shift;
+
+	writel(val, drw_pmu->cfg_base + reg);
+}
+
+static irqreturn_t ali_drw_pmu_isr(int irq_num, void *data)
+{
+	struct ali_drw_pmu_irq *irq = data;
+	struct ali_drw_pmu *drw_pmu;
+	irqreturn_t ret = IRQ_NONE;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(drw_pmu, &irq->pmus_node, pmus_node) {
+		unsigned long status, clr_status;
+		struct perf_event *event;
+		unsigned int idx;
+
+		for (idx = 0; idx < ALI_DRW_PMU_COMMON_MAX_COUNTERS; idx++) {
+			event = drw_pmu->events[idx];
+			if (!event)
+				continue;
+			ali_drw_pmu_disable_counter(event);
+		}
+
+		/* common counter intr status */
+		status = readl(drw_pmu->cfg_base + ALI_DRW_PMU_OV_INTR_STATUS);
+		status = FIELD_GET(ALI_DRW_PMCOM_CNT_OV_INTR_MASK, status);
+		if (status) {
+			for_each_set_bit(idx, &status,
+					 ALI_DRW_PMU_COMMON_MAX_COUNTERS) {
+				event = drw_pmu->events[idx];
+				if (WARN_ON_ONCE(!event))
+					continue;
+				ali_drw_pmu_event_update(event);
+				ali_drw_pmu_event_set_period(event);
+			}
+
+			/* clear common counter intr status */
+			clr_status = FIELD_PREP(ALI_DRW_PMCOM_CNT_OV_INTR_MASK, 1);
+			writel(clr_status,
+			       drw_pmu->cfg_base + ALI_DRW_PMU_OV_INTR_CLR);
+		}
+
+		for (idx = 0; idx < ALI_DRW_PMU_COMMON_MAX_COUNTERS; idx++) {
+			event = drw_pmu->events[idx];
+			if (!event)
+				continue;
+			if (!(event->hw.state & PERF_HES_STOPPED))
+				ali_drw_pmu_enable_counter(event);
+		}
+		if (status)
+			ret = IRQ_HANDLED;
+	}
+	rcu_read_unlock();
+	return ret;
+}
+
+static struct ali_drw_pmu_irq *__ali_drw_pmu_init_irq(struct platform_device
+						      *pdev, int irq_num)
+{
+	int ret;
+	struct ali_drw_pmu_irq *irq;
+
+	list_for_each_entry(irq, &ali_drw_pmu_irqs, irqs_node) {
+		if (irq->irq_num == irq_num
+		    && refcount_inc_not_zero(&irq->refcount))
+			return irq;
+	}
+
+	irq = kzalloc(sizeof(*irq), GFP_KERNEL);
+	if (!irq)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&irq->pmus_node);
+
+	/* Pick one CPU to be the preferred one to use */
+	irq->cpu = smp_processor_id();
+	refcount_set(&irq->refcount, 1);
+
+	/*
+	 * FIXME: one of DDRSS Driveway PMU overflow interrupt shares the same
+	 * irq number with MPAM ERR_IRQ. To register DDRSS PMU and MPAM drivers
+	 * successfully, add IRQF_SHARED flag. Howerer, PMU interrupt should not
+	 * share with other component.
+	 */
+	ret = devm_request_irq(&pdev->dev, irq_num, ali_drw_pmu_isr,
+			       IRQF_SHARED, dev_name(&pdev->dev), irq);
+	if (ret < 0) {
+		dev_err(&pdev->dev,
+			"Fail to request IRQ:%d ret:%d\n", irq_num, ret);
+		goto out_free;
+	}
+
+	ret = irq_set_affinity_hint(irq_num, cpumask_of(irq->cpu));
+	if (ret)
+		goto out_free;
+
+	ret = cpuhp_state_add_instance_nocalls(ali_drw_cpuhp_state_num,
+					     &irq->node);
+	if (ret)
+		goto out_free;
+
+	irq->irq_num = irq_num;
+	list_add(&irq->irqs_node, &ali_drw_pmu_irqs);
+
+	return irq;
+
+out_free:
+	kfree(irq);
+	return ERR_PTR(ret);
+}
+
+static int ali_drw_pmu_init_irq(struct ali_drw_pmu *drw_pmu,
+				struct platform_device *pdev)
+{
+	int irq_num;
+	struct ali_drw_pmu_irq *irq;
+
+	/* Read and init IRQ */
+	irq_num = platform_get_irq(pdev, 0);
+	if (irq_num < 0)
+		return irq_num;
+
+	mutex_lock(&ali_drw_pmu_irqs_lock);
+	irq = __ali_drw_pmu_init_irq(pdev, irq_num);
+	mutex_unlock(&ali_drw_pmu_irqs_lock);
+
+	if (IS_ERR(irq))
+		return PTR_ERR(irq);
+
+	drw_pmu->irq = irq;
+
+	mutex_lock(&ali_drw_pmu_irqs_lock);
+	list_add_rcu(&drw_pmu->pmus_node, &irq->pmus_node);
+	mutex_unlock(&ali_drw_pmu_irqs_lock);
+
+	return 0;
+}
+
+static void ali_drw_pmu_uninit_irq(struct ali_drw_pmu *drw_pmu)
+{
+	struct ali_drw_pmu_irq *irq = drw_pmu->irq;
+
+	mutex_lock(&ali_drw_pmu_irqs_lock);
+	list_del_rcu(&drw_pmu->pmus_node);
+
+	if (!refcount_dec_and_test(&irq->refcount)) {
+		mutex_unlock(&ali_drw_pmu_irqs_lock);
+		return;
+	}
+
+	list_del(&irq->irqs_node);
+	mutex_unlock(&ali_drw_pmu_irqs_lock);
+
+	WARN_ON(irq_set_affinity_hint(irq->irq_num, NULL));
+	cpuhp_state_remove_instance_nocalls(ali_drw_cpuhp_state_num,
+					    &irq->node);
+	kfree(irq);
+}
+
+static int ali_drw_pmu_event_init(struct perf_event *event)
+{
+	struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	struct perf_event *sibling;
+	struct device *dev = drw_pmu->pmu.dev;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (is_sampling_event(event)) {
+		dev_err(dev, "Sampling not supported!\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (event->attach_state & PERF_ATTACH_TASK) {
+		dev_err(dev, "Per-task counter cannot allocate!\n");
+		return -EOPNOTSUPP;
+	}
+
+	event->cpu = drw_pmu->cpu;
+	if (event->cpu < 0) {
+		dev_err(dev, "Per-task mode not supported!\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (event->group_leader != event &&
+	    !is_software_event(event->group_leader)) {
+		dev_err(dev, "driveway only allow one event!\n");
+		return -EINVAL;
+	}
+
+	for_each_sibling_event(sibling, event->group_leader) {
+		if (sibling != event && !is_software_event(sibling)) {
+			dev_err(dev, "driveway event not allowed!\n");
+			return -EINVAL;
+		}
+	}
+
+	/* reset all the pmu counters */
+	writel(ALI_DRW_PMU_CNT_RST, drw_pmu->cfg_base + ALI_DRW_PMU_CNT_CTRL);
+
+	hwc->idx = -1;
+
+	return 0;
+}
+
+static void ali_drw_pmu_start(struct perf_event *event, int flags)
+{
+	struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu);
+
+	event->hw.state = 0;
+
+	if (GET_DRW_EVENTID(event) == ALI_DRW_PMU_CYCLE_EVT_ID) {
+		writel(ALI_DRW_PMU_CNT_START,
+		       drw_pmu->cfg_base + ALI_DRW_PMU_CNT_CTRL);
+		return;
+	}
+
+	ali_drw_pmu_event_set_period(event);
+	if (flags & PERF_EF_RELOAD) {
+		unsigned long prev_raw_count =
+		    local64_read(&event->hw.prev_count);
+		writel(prev_raw_count,
+		       drw_pmu->cfg_base + ALI_DRW_PMU_CNT_PRELOAD);
+	}
+
+	ali_drw_pmu_enable_counter(event);
+
+	writel(ALI_DRW_PMU_CNT_START, drw_pmu->cfg_base + ALI_DRW_PMU_CNT_CTRL);
+}
+
+static void ali_drw_pmu_stop(struct perf_event *event, int flags)
+{
+	struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu);
+
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
+	if (GET_DRW_EVENTID(event) != ALI_DRW_PMU_CYCLE_EVT_ID)
+		ali_drw_pmu_disable_counter(event);
+
+	writel(ALI_DRW_PMU_CNT_STOP, drw_pmu->cfg_base + ALI_DRW_PMU_CNT_CTRL);
+
+	ali_drw_pmu_event_update(event);
+	event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static int ali_drw_pmu_add(struct perf_event *event, int flags)
+{
+	struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = -1;
+	int evtid;
+
+	evtid = GET_DRW_EVENTID(event);
+
+	if (evtid != ALI_DRW_PMU_CYCLE_EVT_ID) {
+		idx = ali_drw_get_counter_idx(event);
+		if (idx < 0)
+			return idx;
+		drw_pmu->events[idx] = event;
+		drw_pmu->evtids[idx] = evtid;
+	}
+	hwc->idx = idx;
+
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+	if (flags & PERF_EF_START)
+		ali_drw_pmu_start(event, PERF_EF_RELOAD);
+
+	/* Propagate our changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+	return 0;
+}
+
+static void ali_drw_pmu_del(struct perf_event *event, int flags)
+{
+	struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	ali_drw_pmu_stop(event, PERF_EF_UPDATE);
+
+	if (idx >= 0 && idx < ALI_DRW_PMU_COMMON_MAX_COUNTERS) {
+		drw_pmu->events[idx] = NULL;
+		drw_pmu->evtids[idx] = 0;
+		clear_bit(idx, drw_pmu->used_mask);
+	}
+
+	perf_event_update_userpage(event);
+}
+
+static void ali_drw_pmu_read(struct perf_event *event)
+{
+	ali_drw_pmu_event_update(event);
+}
+
+static int ali_drw_pmu_probe(struct platform_device *pdev)
+{
+	struct ali_drw_pmu *drw_pmu;
+	struct resource *res;
+	char *name;
+	int ret;
+
+	drw_pmu = devm_kzalloc(&pdev->dev, sizeof(*drw_pmu), GFP_KERNEL);
+	if (!drw_pmu)
+		return -ENOMEM;
+
+	drw_pmu->dev = &pdev->dev;
+	platform_set_drvdata(pdev, drw_pmu);
+
+	drw_pmu->cfg_base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+	if (IS_ERR(drw_pmu->cfg_base))
+		return PTR_ERR(drw_pmu->cfg_base);
+
+	name = devm_kasprintf(drw_pmu->dev, GFP_KERNEL, "ali_drw_%llx",
+			      (u64) (res->start >> ALI_DRW_PMU_PA_SHIFT));
+	if (!name)
+		return -ENOMEM;
+
+	writel(ALI_DRW_PMU_CNT_RST, drw_pmu->cfg_base + ALI_DRW_PMU_CNT_CTRL);
+
+	/* enable the generation of interrupt by all common counters */
+	writel(ALI_DRW_PMCOM_CNT_OV_INTR_MASK,
+	       drw_pmu->cfg_base + ALI_DRW_PMU_OV_INTR_ENABLE_CTL);
+
+	/* clearing interrupt status */
+	writel(0xffffff, drw_pmu->cfg_base + ALI_DRW_PMU_OV_INTR_CLR);
+
+	drw_pmu->cpu = smp_processor_id();
+
+	ret = ali_drw_pmu_init_irq(drw_pmu, pdev);
+	if (ret)
+		return ret;
+
+	drw_pmu->pmu = (struct pmu) {
+		.module		= THIS_MODULE,
+		.task_ctx_nr	= perf_invalid_context,
+		.event_init	= ali_drw_pmu_event_init,
+		.add		= ali_drw_pmu_add,
+		.del		= ali_drw_pmu_del,
+		.start		= ali_drw_pmu_start,
+		.stop		= ali_drw_pmu_stop,
+		.read		= ali_drw_pmu_read,
+		.attr_groups	= ali_drw_pmu_attr_groups,
+		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
+	};
+
+	ret = perf_pmu_register(&drw_pmu->pmu, name, -1);
+	if (ret) {
+		dev_err(drw_pmu->dev, "DRW Driveway PMU PMU register failed!\n");
+		ali_drw_pmu_uninit_irq(drw_pmu);
+	}
+
+	return ret;
+}
+
+static int ali_drw_pmu_remove(struct platform_device *pdev)
+{
+	struct ali_drw_pmu *drw_pmu = platform_get_drvdata(pdev);
+
+	/* disable the generation of interrupt by all common counters */
+	writel(ALI_DRW_PMCOM_CNT_OV_INTR_MASK,
+	       drw_pmu->cfg_base + ALI_DRW_PMU_OV_INTR_DISABLE_CTL);
+
+	ali_drw_pmu_uninit_irq(drw_pmu);
+	perf_pmu_unregister(&drw_pmu->pmu);
+
+	return 0;
+}
+
+static int ali_drw_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct ali_drw_pmu_irq *irq;
+	struct ali_drw_pmu *drw_pmu;
+	unsigned int target;
+	int ret;
+	cpumask_t node_online_cpus;
+
+	irq = hlist_entry_safe(node, struct ali_drw_pmu_irq, node);
+	if (cpu != irq->cpu)
+		return 0;
+
+	ret = cpumask_and(&node_online_cpus,
+			  cpumask_of_node(cpu_to_node(cpu)), cpu_online_mask);
+	if (ret)
+		target = cpumask_any_but(&node_online_cpus, cpu);
+	else
+		target = cpumask_any_but(cpu_online_mask, cpu);
+
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	/* We're only reading, but this isn't the place to be involving RCU */
+	mutex_lock(&ali_drw_pmu_irqs_lock);
+	list_for_each_entry(drw_pmu, &irq->pmus_node, pmus_node)
+		perf_pmu_migrate_context(&drw_pmu->pmu, irq->cpu, target);
+	mutex_unlock(&ali_drw_pmu_irqs_lock);
+
+	WARN_ON(irq_set_affinity_hint(irq->irq_num, cpumask_of(target)));
+	irq->cpu = target;
+
+	return 0;
+}
+
+/*
+ * Due to historical reasons, the HID used in the production environment is
+ * ARMHD700, so we leave ARMHD700 as Compatible ID.
+ */
+static const struct acpi_device_id ali_drw_acpi_match[] = {
+	{"BABA5000", 0},
+	{"ARMHD700", 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(acpi, ali_drw_acpi_match);
+
+static struct platform_driver ali_drw_pmu_driver = {
+	.driver = {
+		   .name = "ali_drw_pmu",
+		   .acpi_match_table = ali_drw_acpi_match,
+		   },
+	.probe = ali_drw_pmu_probe,
+	.remove = ali_drw_pmu_remove,
+};
+
+static int __init ali_drw_pmu_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+				      "ali_drw_pmu:online",
+				      NULL, ali_drw_pmu_offline_cpu);
+
+	if (ret < 0) {
+		pr_err("DRW Driveway PMU: setup hotplug failed, ret = %d\n",
+		       ret);
+		return ret;
+	}
+	ali_drw_cpuhp_state_num = ret;
+
+	ret = platform_driver_register(&ali_drw_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(ali_drw_cpuhp_state_num);
+
+	return ret;
+}
+
+static void __exit ali_drw_pmu_exit(void)
+{
+	platform_driver_unregister(&ali_drw_pmu_driver);
+	cpuhp_remove_multi_state(ali_drw_cpuhp_state_num);
+}
+
+module_init(ali_drw_pmu_init);
+module_exit(ali_drw_pmu_exit);
+
+MODULE_AUTHOR("Hongbo Yao <yaohongbo@linux.alibaba.com>");
+MODULE_AUTHOR("Neng Chen <nengchen@linux.alibaba.com>");
+MODULE_AUTHOR("Shuai Xue <xueshuai@linux.alibaba.com>");
+MODULE_DESCRIPTION("Alibaba DDR Sub-System Driveway PMU driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/amlogic/Kconfig b/drivers/perf/amlogic/Kconfig
new file mode 100644
index 0000000000..f68db01a7f
--- /dev/null
+++ b/drivers/perf/amlogic/Kconfig
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config MESON_DDR_PMU
+	tristate "Amlogic DDR Bandwidth Performance Monitor"
+	depends on ARCH_MESON || COMPILE_TEST
+	help
+          Provides support for the DDR performance monitor
+          in Amlogic SoCs, which can give information about
+          memory throughput and other related events. It
+          supports multiple channels to monitor the memory
+          bandwidth simultaneously.
diff --git a/drivers/perf/amlogic/Makefile b/drivers/perf/amlogic/Makefile
new file mode 100644
index 0000000000..d3ab2ac535
--- /dev/null
+++ b/drivers/perf/amlogic/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_MESON_DDR_PMU) += meson_ddr_pmu_g12.o
+
+meson_ddr_pmu_g12-y	:= meson_ddr_pmu_core.o meson_g12_ddr_pmu.o
diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c
new file mode 100644
index 0000000000..bbc7285fd9
--- /dev/null
+++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c
@@ -0,0 +1,563 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022 Amlogic, Inc. All rights reserved.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/init.h>
+#include <linux/irqreturn.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+
+#include <soc/amlogic/meson_ddr_pmu.h>
+
+struct ddr_pmu {
+	struct pmu pmu;
+	struct dmc_info info;
+	struct dmc_counter counters;	/* save counters from hw */
+	bool pmu_enabled;
+	struct device *dev;
+	char *name;
+	struct hlist_node node;
+	enum cpuhp_state cpuhp_state;
+	int cpu;			/* for cpu hotplug */
+};
+
+#define DDR_PERF_DEV_NAME "meson_ddr_bw"
+#define MAX_AXI_PORTS_OF_CHANNEL	4	/* A DMC channel can monitor max 4 axi ports */
+
+#define to_ddr_pmu(p)		container_of(p, struct ddr_pmu, pmu)
+#define dmc_info_to_pmu(p)	container_of(p, struct ddr_pmu, info)
+
+static void dmc_pmu_enable(struct ddr_pmu *pmu)
+{
+	if (!pmu->pmu_enabled)
+		pmu->info.hw_info->enable(&pmu->info);
+
+	pmu->pmu_enabled = true;
+}
+
+static void dmc_pmu_disable(struct ddr_pmu *pmu)
+{
+	if (pmu->pmu_enabled)
+		pmu->info.hw_info->disable(&pmu->info);
+
+	pmu->pmu_enabled = false;
+}
+
+static void meson_ddr_set_axi_filter(struct perf_event *event, u8 axi_id)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	int chann;
+
+	if (event->attr.config > ALL_CHAN_COUNTER_ID &&
+	    event->attr.config < COUNTER_MAX_ID) {
+		chann = event->attr.config - CHAN1_COUNTER_ID;
+
+		pmu->info.hw_info->set_axi_filter(&pmu->info, axi_id, chann);
+	}
+}
+
+static void ddr_cnt_addition(struct dmc_counter *sum,
+			     struct dmc_counter *add1,
+			     struct dmc_counter *add2,
+			     int chann_nr)
+{
+	int i;
+	u64 cnt1, cnt2;
+
+	sum->all_cnt = add1->all_cnt + add2->all_cnt;
+	sum->all_req = add1->all_req + add2->all_req;
+	for (i = 0; i < chann_nr; i++) {
+		cnt1 = add1->channel_cnt[i];
+		cnt2 = add2->channel_cnt[i];
+
+		sum->channel_cnt[i] = cnt1 + cnt2;
+	}
+}
+
+static void meson_ddr_perf_event_update(struct perf_event *event)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	u64 new_raw_count = 0;
+	struct dmc_counter dc = {0}, sum_dc = {0};
+	int idx;
+	int chann_nr = pmu->info.hw_info->chann_nr;
+
+	/* get the remain counters in register. */
+	pmu->info.hw_info->get_counters(&pmu->info, &dc);
+
+	ddr_cnt_addition(&sum_dc, &pmu->counters, &dc, chann_nr);
+
+	switch (event->attr.config) {
+	case ALL_CHAN_COUNTER_ID:
+		new_raw_count = sum_dc.all_cnt;
+		break;
+	case CHAN1_COUNTER_ID:
+	case CHAN2_COUNTER_ID:
+	case CHAN3_COUNTER_ID:
+	case CHAN4_COUNTER_ID:
+	case CHAN5_COUNTER_ID:
+	case CHAN6_COUNTER_ID:
+	case CHAN7_COUNTER_ID:
+	case CHAN8_COUNTER_ID:
+		idx = event->attr.config - CHAN1_COUNTER_ID;
+		new_raw_count = sum_dc.channel_cnt[idx];
+		break;
+	}
+
+	local64_set(&event->count, new_raw_count);
+}
+
+static int meson_ddr_perf_event_init(struct perf_event *event)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	u64 config1 = event->attr.config1;
+	u64 config2 = event->attr.config2;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+		return -EOPNOTSUPP;
+
+	if (event->cpu < 0)
+		return -EOPNOTSUPP;
+
+	/* check if the number of parameters is too much */
+	if (event->attr.config != ALL_CHAN_COUNTER_ID &&
+	    hweight64(config1) + hweight64(config2) > MAX_AXI_PORTS_OF_CHANNEL)
+		return -EOPNOTSUPP;
+
+	event->cpu = pmu->cpu;
+
+	return 0;
+}
+
+static void meson_ddr_perf_event_start(struct perf_event *event, int flags)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+
+	memset(&pmu->counters, 0, sizeof(pmu->counters));
+	dmc_pmu_enable(pmu);
+}
+
+static int meson_ddr_perf_event_add(struct perf_event *event, int flags)
+{
+	u64 config1 = event->attr.config1;
+	u64 config2 = event->attr.config2;
+	int i;
+
+	for_each_set_bit(i,
+			 (const unsigned long *)&config1,
+			 BITS_PER_TYPE(config1))
+		meson_ddr_set_axi_filter(event, i);
+
+	for_each_set_bit(i,
+			 (const unsigned long *)&config2,
+			 BITS_PER_TYPE(config2))
+		meson_ddr_set_axi_filter(event, i + 64);
+
+	if (flags & PERF_EF_START)
+		meson_ddr_perf_event_start(event, flags);
+
+	return 0;
+}
+
+static void meson_ddr_perf_event_stop(struct perf_event *event, int flags)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+
+	if (flags & PERF_EF_UPDATE)
+		meson_ddr_perf_event_update(event);
+
+	dmc_pmu_disable(pmu);
+}
+
+static void meson_ddr_perf_event_del(struct perf_event *event, int flags)
+{
+	meson_ddr_perf_event_stop(event, PERF_EF_UPDATE);
+}
+
+static ssize_t meson_ddr_perf_cpumask_show(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct ddr_pmu *pmu = dev_get_drvdata(dev);
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu));
+}
+
+static struct device_attribute meson_ddr_perf_cpumask_attr =
+__ATTR(cpumask, 0444, meson_ddr_perf_cpumask_show, NULL);
+
+static struct attribute *meson_ddr_perf_cpumask_attrs[] = {
+	&meson_ddr_perf_cpumask_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group ddr_perf_cpumask_attr_group = {
+	.attrs = meson_ddr_perf_cpumask_attrs,
+};
+
+static ssize_t
+pmu_event_show(struct device *dev, struct device_attribute *attr,
+	       char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+	return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+static ssize_t
+event_show_unit(struct device *dev, struct device_attribute *attr,
+		char *page)
+{
+	return sysfs_emit(page, "MB\n");
+}
+
+static ssize_t
+event_show_scale(struct device *dev, struct device_attribute *attr,
+		 char *page)
+{
+	/* one count = 16byte = 1.52587890625e-05 MB */
+	return sysfs_emit(page, "1.52587890625e-05\n");
+}
+
+#define AML_DDR_PMU_EVENT_ATTR(_name, _id)				\
+{									\
+	.attr = __ATTR(_name, 0444, pmu_event_show, NULL),		\
+	.id = _id,							\
+}
+
+#define AML_DDR_PMU_EVENT_UNIT_ATTR(_name)				\
+	__ATTR(_name.unit, 0444, event_show_unit, NULL)
+
+#define AML_DDR_PMU_EVENT_SCALE_ATTR(_name)				\
+	__ATTR(_name.scale, 0444, event_show_scale, NULL)
+
+static struct device_attribute event_unit_attrs[] = {
+	AML_DDR_PMU_EVENT_UNIT_ATTR(total_rw_bytes),
+	AML_DDR_PMU_EVENT_UNIT_ATTR(chan_1_rw_bytes),
+	AML_DDR_PMU_EVENT_UNIT_ATTR(chan_2_rw_bytes),
+	AML_DDR_PMU_EVENT_UNIT_ATTR(chan_3_rw_bytes),
+	AML_DDR_PMU_EVENT_UNIT_ATTR(chan_4_rw_bytes),
+	AML_DDR_PMU_EVENT_UNIT_ATTR(chan_5_rw_bytes),
+	AML_DDR_PMU_EVENT_UNIT_ATTR(chan_6_rw_bytes),
+	AML_DDR_PMU_EVENT_UNIT_ATTR(chan_7_rw_bytes),
+	AML_DDR_PMU_EVENT_UNIT_ATTR(chan_8_rw_bytes),
+};
+
+static struct device_attribute event_scale_attrs[] = {
+	AML_DDR_PMU_EVENT_SCALE_ATTR(total_rw_bytes),
+	AML_DDR_PMU_EVENT_SCALE_ATTR(chan_1_rw_bytes),
+	AML_DDR_PMU_EVENT_SCALE_ATTR(chan_2_rw_bytes),
+	AML_DDR_PMU_EVENT_SCALE_ATTR(chan_3_rw_bytes),
+	AML_DDR_PMU_EVENT_SCALE_ATTR(chan_4_rw_bytes),
+	AML_DDR_PMU_EVENT_SCALE_ATTR(chan_5_rw_bytes),
+	AML_DDR_PMU_EVENT_SCALE_ATTR(chan_6_rw_bytes),
+	AML_DDR_PMU_EVENT_SCALE_ATTR(chan_7_rw_bytes),
+	AML_DDR_PMU_EVENT_SCALE_ATTR(chan_8_rw_bytes),
+};
+
+static struct perf_pmu_events_attr event_attrs[] = {
+	AML_DDR_PMU_EVENT_ATTR(total_rw_bytes, ALL_CHAN_COUNTER_ID),
+	AML_DDR_PMU_EVENT_ATTR(chan_1_rw_bytes, CHAN1_COUNTER_ID),
+	AML_DDR_PMU_EVENT_ATTR(chan_2_rw_bytes, CHAN2_COUNTER_ID),
+	AML_DDR_PMU_EVENT_ATTR(chan_3_rw_bytes, CHAN3_COUNTER_ID),
+	AML_DDR_PMU_EVENT_ATTR(chan_4_rw_bytes, CHAN4_COUNTER_ID),
+	AML_DDR_PMU_EVENT_ATTR(chan_5_rw_bytes, CHAN5_COUNTER_ID),
+	AML_DDR_PMU_EVENT_ATTR(chan_6_rw_bytes, CHAN6_COUNTER_ID),
+	AML_DDR_PMU_EVENT_ATTR(chan_7_rw_bytes, CHAN7_COUNTER_ID),
+	AML_DDR_PMU_EVENT_ATTR(chan_8_rw_bytes, CHAN8_COUNTER_ID),
+};
+
+/* three attrs are combined an event */
+static struct attribute *ddr_perf_events_attrs[COUNTER_MAX_ID * 3];
+
+static struct attribute_group ddr_perf_events_attr_group = {
+	.name = "events",
+	.attrs = ddr_perf_events_attrs,
+};
+
+static umode_t meson_ddr_perf_format_attr_visible(struct kobject *kobj,
+						  struct attribute *attr,
+						  int n)
+{
+	struct pmu *pmu = dev_get_drvdata(kobj_to_dev(kobj));
+	struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
+	const u64 *capability = ddr_pmu->info.hw_info->capability;
+	struct device_attribute *dev_attr;
+	int id;
+	char value[20]; // config1:xxx, 20 is enough
+
+	dev_attr = container_of(attr, struct device_attribute, attr);
+	dev_attr->show(NULL, NULL, value);
+
+	if (sscanf(value, "config1:%d", &id) == 1)
+		return capability[0] & (1ULL << id) ? attr->mode : 0;
+
+	if (sscanf(value, "config2:%d", &id) == 1)
+		return capability[1] & (1ULL << id) ? attr->mode : 0;
+
+	return attr->mode;
+}
+
+static struct attribute_group ddr_perf_format_attr_group = {
+	.name = "format",
+	.is_visible = meson_ddr_perf_format_attr_visible,
+};
+
+static ssize_t meson_ddr_perf_identifier_show(struct device *dev,
+					      struct device_attribute *attr,
+					      char *page)
+{
+	struct ddr_pmu *pmu = dev_get_drvdata(dev);
+
+	return sysfs_emit(page, "%s\n", pmu->name);
+}
+
+static struct device_attribute meson_ddr_perf_identifier_attr =
+__ATTR(identifier, 0444, meson_ddr_perf_identifier_show, NULL);
+
+static struct attribute *meson_ddr_perf_identifier_attrs[] = {
+	&meson_ddr_perf_identifier_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group ddr_perf_identifier_attr_group = {
+	.attrs = meson_ddr_perf_identifier_attrs,
+};
+
+static const struct attribute_group *attr_groups[] = {
+	&ddr_perf_events_attr_group,
+	&ddr_perf_format_attr_group,
+	&ddr_perf_cpumask_attr_group,
+	&ddr_perf_identifier_attr_group,
+	NULL,
+};
+
+static irqreturn_t dmc_irq_handler(int irq, void *dev_id)
+{
+	struct dmc_info *info = dev_id;
+	struct ddr_pmu *pmu;
+	struct dmc_counter counters, *sum_cnter;
+	int i;
+
+	pmu = dmc_info_to_pmu(info);
+
+	if (info->hw_info->irq_handler(info, &counters) != 0)
+		goto out;
+
+	sum_cnter = &pmu->counters;
+	sum_cnter->all_cnt += counters.all_cnt;
+	sum_cnter->all_req += counters.all_req;
+
+	for (i = 0; i < pmu->info.hw_info->chann_nr; i++)
+		sum_cnter->channel_cnt[i] += counters.channel_cnt[i];
+
+	if (pmu->pmu_enabled)
+		/*
+		 * the timer interrupt only supprt
+		 * one shot mode, we have to re-enable
+		 * it in ISR to support continue mode.
+		 */
+		info->hw_info->enable(info);
+
+	dev_dbg(pmu->dev, "counts: %llu %llu %llu, %llu, %llu, %llu\t\t"
+			"sum: %llu %llu %llu, %llu, %llu, %llu\n",
+			counters.all_req,
+			counters.all_cnt,
+			counters.channel_cnt[0],
+			counters.channel_cnt[1],
+			counters.channel_cnt[2],
+			counters.channel_cnt[3],
+
+			pmu->counters.all_req,
+			pmu->counters.all_cnt,
+			pmu->counters.channel_cnt[0],
+			pmu->counters.channel_cnt[1],
+			pmu->counters.channel_cnt[2],
+			pmu->counters.channel_cnt[3]);
+out:
+	return IRQ_HANDLED;
+}
+
+static int ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct ddr_pmu *pmu = hlist_entry_safe(node, struct ddr_pmu, node);
+	int target;
+
+	if (cpu != pmu->cpu)
+		return 0;
+
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	perf_pmu_migrate_context(&pmu->pmu, cpu, target);
+	pmu->cpu = target;
+
+	WARN_ON(irq_set_affinity(pmu->info.irq_num, cpumask_of(pmu->cpu)));
+
+	return 0;
+}
+
+static void fill_event_attr(struct ddr_pmu *pmu)
+{
+	int i, j, k;
+	struct attribute **dst = ddr_perf_events_attrs;
+
+	j = 0;
+	k = 0;
+
+	/* fill ALL_CHAN_COUNTER_ID event */
+	dst[j++] = &event_attrs[k].attr.attr;
+	dst[j++] = &event_unit_attrs[k].attr;
+	dst[j++] = &event_scale_attrs[k].attr;
+
+	k++;
+
+	/* fill each channel event */
+	for (i = 0; i < pmu->info.hw_info->chann_nr; i++, k++) {
+		dst[j++] = &event_attrs[k].attr.attr;
+		dst[j++] = &event_unit_attrs[k].attr;
+		dst[j++] = &event_scale_attrs[k].attr;
+	}
+
+	dst[j] = NULL; /* mark end */
+}
+
+static void fmt_attr_fill(struct attribute **fmt_attr)
+{
+	ddr_perf_format_attr_group.attrs = fmt_attr;
+}
+
+static int ddr_pmu_parse_dt(struct platform_device *pdev,
+			    struct dmc_info *info)
+{
+	void __iomem *base;
+	int i, ret;
+
+	info->hw_info = of_device_get_match_data(&pdev->dev);
+
+	for (i = 0; i < info->hw_info->dmc_nr; i++) {
+		/* resource 0 for ddr register base */
+		base = devm_platform_ioremap_resource(pdev, i);
+		if (IS_ERR(base))
+			return PTR_ERR(base);
+
+		info->ddr_reg[i] = base;
+	}
+
+	/* resource i for pll register base */
+	base = devm_platform_ioremap_resource(pdev, i);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	info->pll_reg = base;
+
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0)
+		return ret;
+
+	info->irq_num = ret;
+
+	ret = devm_request_irq(&pdev->dev, info->irq_num, dmc_irq_handler,
+			       IRQF_NOBALANCING, dev_name(&pdev->dev),
+			       (void *)info);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+int meson_ddr_pmu_create(struct platform_device *pdev)
+{
+	int ret;
+	char *name;
+	struct ddr_pmu *pmu;
+
+	pmu = devm_kzalloc(&pdev->dev, sizeof(struct ddr_pmu), GFP_KERNEL);
+	if (!pmu)
+		return -ENOMEM;
+
+	*pmu = (struct ddr_pmu) {
+		.pmu = {
+			.module		= THIS_MODULE,
+			.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
+			.task_ctx_nr	= perf_invalid_context,
+			.attr_groups	= attr_groups,
+			.event_init	= meson_ddr_perf_event_init,
+			.add		= meson_ddr_perf_event_add,
+			.del		= meson_ddr_perf_event_del,
+			.start		= meson_ddr_perf_event_start,
+			.stop		= meson_ddr_perf_event_stop,
+			.read		= meson_ddr_perf_event_update,
+		},
+	};
+
+	ret = ddr_pmu_parse_dt(pdev, &pmu->info);
+	if (ret < 0)
+		return ret;
+
+	fmt_attr_fill(pmu->info.hw_info->fmt_attr);
+
+	pmu->cpu = smp_processor_id();
+
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME);
+	if (!name)
+		return -ENOMEM;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, name, NULL,
+				      ddr_perf_offline_cpu);
+	if (ret < 0)
+		return ret;
+
+	pmu->cpuhp_state = ret;
+
+	/* Register the pmu instance for cpu hotplug */
+	ret = cpuhp_state_add_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+	if (ret)
+		goto cpuhp_instance_err;
+
+	fill_event_attr(pmu);
+
+	ret = perf_pmu_register(&pmu->pmu, name, -1);
+	if (ret)
+		goto pmu_register_err;
+
+	pmu->name = name;
+	pmu->dev = &pdev->dev;
+	pmu->pmu_enabled = false;
+
+	platform_set_drvdata(pdev, pmu);
+
+	return 0;
+
+pmu_register_err:
+	cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+
+cpuhp_instance_err:
+	cpuhp_remove_state(pmu->cpuhp_state);
+
+	return ret;
+}
+
+int meson_ddr_pmu_remove(struct platform_device *pdev)
+{
+	struct ddr_pmu *pmu = platform_get_drvdata(pdev);
+
+	perf_pmu_unregister(&pmu->pmu);
+	cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+	cpuhp_remove_state(pmu->cpuhp_state);
+
+	return 0;
+}
diff --git a/drivers/perf/amlogic/meson_g12_ddr_pmu.c b/drivers/perf/amlogic/meson_g12_ddr_pmu.c
new file mode 100644
index 0000000000..8b643888d5
--- /dev/null
+++ b/drivers/perf/amlogic/meson_g12_ddr_pmu.c
@@ -0,0 +1,394 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022 Amlogic, Inc. All rights reserved.
+ */
+
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+#include <linux/types.h>
+
+#include <soc/amlogic/meson_ddr_pmu.h>
+
+#define PORT_MAJOR		32
+#define DEFAULT_XTAL_FREQ	24000000UL
+
+#define DMC_QOS_IRQ		BIT(30)
+
+/* DMC bandwidth monitor register address offset */
+#define DMC_MON_G12_CTRL0		(0x0  << 2)
+#define DMC_MON_G12_CTRL1		(0x1  << 2)
+#define DMC_MON_G12_CTRL2		(0x2  << 2)
+#define DMC_MON_G12_CTRL3		(0x3  << 2)
+#define DMC_MON_G12_CTRL4		(0x4  << 2)
+#define DMC_MON_G12_CTRL5		(0x5  << 2)
+#define DMC_MON_G12_CTRL6		(0x6  << 2)
+#define DMC_MON_G12_CTRL7		(0x7  << 2)
+#define DMC_MON_G12_CTRL8		(0x8  << 2)
+
+#define DMC_MON_G12_ALL_REQ_CNT		(0x9  << 2)
+#define DMC_MON_G12_ALL_GRANT_CNT	(0xa  << 2)
+#define DMC_MON_G12_ONE_GRANT_CNT	(0xb  << 2)
+#define DMC_MON_G12_SEC_GRANT_CNT	(0xc  << 2)
+#define DMC_MON_G12_THD_GRANT_CNT	(0xd  << 2)
+#define DMC_MON_G12_FOR_GRANT_CNT	(0xe  << 2)
+#define DMC_MON_G12_TIMER		(0xf  << 2)
+
+/* Each bit represent a axi line */
+PMU_FORMAT_ATTR(event, "config:0-7");
+PMU_FORMAT_ATTR(arm, "config1:0");
+PMU_FORMAT_ATTR(gpu, "config1:1");
+PMU_FORMAT_ATTR(pcie, "config1:2");
+PMU_FORMAT_ATTR(hdcp, "config1:3");
+PMU_FORMAT_ATTR(hevc_front, "config1:4");
+PMU_FORMAT_ATTR(usb3_0, "config1:6");
+PMU_FORMAT_ATTR(device, "config1:7");
+PMU_FORMAT_ATTR(hevc_back, "config1:8");
+PMU_FORMAT_ATTR(h265enc, "config1:9");
+PMU_FORMAT_ATTR(vpu_read1, "config1:16");
+PMU_FORMAT_ATTR(vpu_read2, "config1:17");
+PMU_FORMAT_ATTR(vpu_read3, "config1:18");
+PMU_FORMAT_ATTR(vpu_write1, "config1:19");
+PMU_FORMAT_ATTR(vpu_write2, "config1:20");
+PMU_FORMAT_ATTR(vdec, "config1:21");
+PMU_FORMAT_ATTR(hcodec, "config1:22");
+PMU_FORMAT_ATTR(ge2d, "config1:23");
+
+PMU_FORMAT_ATTR(spicc1, "config1:32");
+PMU_FORMAT_ATTR(usb0, "config1:33");
+PMU_FORMAT_ATTR(dma, "config1:34");
+PMU_FORMAT_ATTR(arb0, "config1:35");
+PMU_FORMAT_ATTR(sd_emmc_b, "config1:36");
+PMU_FORMAT_ATTR(usb1, "config1:37");
+PMU_FORMAT_ATTR(audio, "config1:38");
+PMU_FORMAT_ATTR(aififo, "config1:39");
+PMU_FORMAT_ATTR(parser, "config1:41");
+PMU_FORMAT_ATTR(ao_cpu, "config1:42");
+PMU_FORMAT_ATTR(sd_emmc_c, "config1:43");
+PMU_FORMAT_ATTR(spicc2, "config1:44");
+PMU_FORMAT_ATTR(ethernet, "config1:45");
+PMU_FORMAT_ATTR(sana, "config1:46");
+
+/* for sm1 and g12b */
+PMU_FORMAT_ATTR(nna, "config1:10");
+
+/* for g12b only */
+PMU_FORMAT_ATTR(gdc, "config1:11");
+PMU_FORMAT_ATTR(mipi_isp, "config1:12");
+PMU_FORMAT_ATTR(arm1, "config1:13");
+PMU_FORMAT_ATTR(sd_emmc_a, "config1:40");
+
+static struct attribute *g12_pmu_format_attrs[] = {
+	&format_attr_event.attr,
+	&format_attr_arm.attr,
+	&format_attr_gpu.attr,
+	&format_attr_nna.attr,
+	&format_attr_gdc.attr,
+	&format_attr_arm1.attr,
+	&format_attr_mipi_isp.attr,
+	&format_attr_sd_emmc_a.attr,
+	&format_attr_pcie.attr,
+	&format_attr_hdcp.attr,
+	&format_attr_hevc_front.attr,
+	&format_attr_usb3_0.attr,
+	&format_attr_device.attr,
+	&format_attr_hevc_back.attr,
+	&format_attr_h265enc.attr,
+	&format_attr_vpu_read1.attr,
+	&format_attr_vpu_read2.attr,
+	&format_attr_vpu_read3.attr,
+	&format_attr_vpu_write1.attr,
+	&format_attr_vpu_write2.attr,
+	&format_attr_vdec.attr,
+	&format_attr_hcodec.attr,
+	&format_attr_ge2d.attr,
+	&format_attr_spicc1.attr,
+	&format_attr_usb0.attr,
+	&format_attr_dma.attr,
+	&format_attr_arb0.attr,
+	&format_attr_sd_emmc_b.attr,
+	&format_attr_usb1.attr,
+	&format_attr_audio.attr,
+	&format_attr_aififo.attr,
+	&format_attr_parser.attr,
+	&format_attr_ao_cpu.attr,
+	&format_attr_sd_emmc_c.attr,
+	&format_attr_spicc2.attr,
+	&format_attr_ethernet.attr,
+	&format_attr_sana.attr,
+	NULL,
+};
+
+/* calculate ddr clock */
+static unsigned long dmc_g12_get_freq_quick(struct dmc_info *info)
+{
+	unsigned int val;
+	unsigned int n, m, od1;
+	unsigned int od_div = 0xfff;
+	unsigned long freq = 0;
+
+	val = readl(info->pll_reg);
+	val = val & 0xfffff;
+	switch ((val >> 16) & 7) {
+	case 0:
+		od_div = 2;
+		break;
+
+	case 1:
+		od_div = 3;
+		break;
+
+	case 2:
+		od_div = 4;
+		break;
+
+	case 3:
+		od_div = 6;
+		break;
+
+	case 4:
+		od_div = 8;
+		break;
+
+	default:
+		break;
+	}
+
+	m = val & 0x1ff;
+	n = ((val >> 10) & 0x1f);
+	od1 = (((val >> 19) & 0x1)) == 1 ? 2 : 1;
+	freq = DEFAULT_XTAL_FREQ / 1000;        /* avoid overflow */
+	if (n)
+		freq = ((((freq * m) / n) >> od1) / od_div) * 1000;
+
+	return freq;
+}
+
+#ifdef DEBUG
+static void g12_dump_reg(struct dmc_info *db)
+{
+	int s = 0, i;
+	unsigned int r;
+
+	for (i = 0; i < 9; i++) {
+		r  = readl(db->ddr_reg[0] + (DMC_MON_G12_CTRL0 + (i << 2)));
+		pr_notice("DMC_MON_CTRL%d:        %08x\n", i, r);
+	}
+	r  = readl(db->ddr_reg[0] + DMC_MON_G12_ALL_REQ_CNT);
+	pr_notice("DMC_MON_ALL_REQ_CNT:  %08x\n", r);
+	r  = readl(db->ddr_reg[0] + DMC_MON_G12_ALL_GRANT_CNT);
+	pr_notice("DMC_MON_ALL_GRANT_CNT:%08x\n", r);
+	r  = readl(db->ddr_reg[0] + DMC_MON_G12_ONE_GRANT_CNT);
+	pr_notice("DMC_MON_ONE_GRANT_CNT:%08x\n", r);
+	r  = readl(db->ddr_reg[0] + DMC_MON_G12_SEC_GRANT_CNT);
+	pr_notice("DMC_MON_SEC_GRANT_CNT:%08x\n", r);
+	r  = readl(db->ddr_reg[0] + DMC_MON_G12_THD_GRANT_CNT);
+	pr_notice("DMC_MON_THD_GRANT_CNT:%08x\n", r);
+	r  = readl(db->ddr_reg[0] + DMC_MON_G12_FOR_GRANT_CNT);
+	pr_notice("DMC_MON_FOR_GRANT_CNT:%08x\n", r);
+	r  = readl(db->ddr_reg[0] + DMC_MON_G12_TIMER);
+	pr_notice("DMC_MON_TIMER:        %08x\n", r);
+}
+#endif
+
+static void dmc_g12_counter_enable(struct dmc_info *info)
+{
+	unsigned int val;
+	unsigned long clock_count = dmc_g12_get_freq_quick(info) / 10; /* 100ms */
+
+	writel(clock_count, info->ddr_reg[0] + DMC_MON_G12_TIMER);
+
+	val = readl(info->ddr_reg[0] + DMC_MON_G12_CTRL0);
+
+	/* enable all channel */
+	val =  BIT(31) |	/* enable bit */
+	       BIT(20) |	/* use timer  */
+	       0x0f;		/* 4 channels */
+
+	writel(val, info->ddr_reg[0] + DMC_MON_G12_CTRL0);
+
+#ifdef DEBUG
+	g12_dump_reg(info);
+#endif
+}
+
+static void dmc_g12_config_fiter(struct dmc_info *info,
+				 int port, int channel)
+{
+	u32 val;
+	u32 rp[MAX_CHANNEL_NUM] = {DMC_MON_G12_CTRL1, DMC_MON_G12_CTRL3,
+					DMC_MON_G12_CTRL5, DMC_MON_G12_CTRL7};
+	u32 rs[MAX_CHANNEL_NUM] = {DMC_MON_G12_CTRL2, DMC_MON_G12_CTRL4,
+					DMC_MON_G12_CTRL6, DMC_MON_G12_CTRL8};
+	int subport = -1;
+
+	/* clear all port mask */
+	if (port < 0) {
+		writel(0, info->ddr_reg[0] + rp[channel]);
+		writel(0, info->ddr_reg[0] + rs[channel]);
+		return;
+	}
+
+	if (port >= PORT_MAJOR)
+		subport = port - PORT_MAJOR;
+
+	if (subport < 0) {
+		val = readl(info->ddr_reg[0] + rp[channel]);
+		val |=  (1 << port);
+		writel(val, info->ddr_reg[0] + rp[channel]);
+		val = 0xffff;
+		writel(val, info->ddr_reg[0] + rs[channel]);
+	} else {
+		val = BIT(23);		/* select device */
+		writel(val, info->ddr_reg[0] + rp[channel]);
+		val = readl(info->ddr_reg[0] + rs[channel]);
+		val |= (1 << subport);
+		writel(val, info->ddr_reg[0] + rs[channel]);
+	}
+}
+
+static void dmc_g12_set_axi_filter(struct dmc_info *info, int axi_id, int channel)
+{
+	if (channel > info->hw_info->chann_nr)
+		return;
+
+	dmc_g12_config_fiter(info, axi_id, channel);
+}
+
+static void dmc_g12_counter_disable(struct dmc_info *info)
+{
+	int i;
+
+	/* clear timer */
+	writel(0, info->ddr_reg[0] + DMC_MON_G12_CTRL0);
+	writel(0, info->ddr_reg[0] + DMC_MON_G12_TIMER);
+
+	writel(0, info->ddr_reg[0] + DMC_MON_G12_ALL_REQ_CNT);
+	writel(0, info->ddr_reg[0] + DMC_MON_G12_ALL_GRANT_CNT);
+	writel(0, info->ddr_reg[0] + DMC_MON_G12_ONE_GRANT_CNT);
+	writel(0, info->ddr_reg[0] + DMC_MON_G12_SEC_GRANT_CNT);
+	writel(0, info->ddr_reg[0] + DMC_MON_G12_THD_GRANT_CNT);
+	writel(0, info->ddr_reg[0] + DMC_MON_G12_FOR_GRANT_CNT);
+
+	/* clear port channel mapping */
+	for (i = 0; i < info->hw_info->chann_nr; i++)
+		dmc_g12_config_fiter(info, -1, i);
+}
+
+static void dmc_g12_get_counters(struct dmc_info *info,
+				 struct dmc_counter *counter)
+{
+	int i;
+	unsigned int reg;
+
+	counter->all_cnt = readl(info->ddr_reg[0] + DMC_MON_G12_ALL_GRANT_CNT);
+	counter->all_req   = readl(info->ddr_reg[0] + DMC_MON_G12_ALL_REQ_CNT);
+
+	for (i = 0; i < info->hw_info->chann_nr; i++) {
+		reg = DMC_MON_G12_ONE_GRANT_CNT + (i << 2);
+		counter->channel_cnt[i] = readl(info->ddr_reg[0] + reg);
+	}
+}
+
+static int dmc_g12_irq_handler(struct dmc_info *info,
+			       struct dmc_counter *counter)
+{
+	unsigned int val;
+	int ret = -EINVAL;
+
+	val = readl(info->ddr_reg[0] + DMC_MON_G12_CTRL0);
+	if (val & DMC_QOS_IRQ) {
+		dmc_g12_get_counters(info, counter);
+		/* clear irq flags */
+		writel(val, info->ddr_reg[0] + DMC_MON_G12_CTRL0);
+		ret = 0;
+	}
+	return ret;
+}
+
+static const struct dmc_hw_info g12a_dmc_info = {
+	.enable		= dmc_g12_counter_enable,
+	.disable	= dmc_g12_counter_disable,
+	.irq_handler	= dmc_g12_irq_handler,
+	.get_counters	= dmc_g12_get_counters,
+	.set_axi_filter	= dmc_g12_set_axi_filter,
+
+	.dmc_nr = 1,
+	.chann_nr = 4,
+	.capability = {0X7EFF00FF03DF, 0},
+	.fmt_attr = g12_pmu_format_attrs,
+};
+
+static const struct dmc_hw_info g12b_dmc_info = {
+	.enable		= dmc_g12_counter_enable,
+	.disable	= dmc_g12_counter_disable,
+	.irq_handler	= dmc_g12_irq_handler,
+	.get_counters	= dmc_g12_get_counters,
+	.set_axi_filter	= dmc_g12_set_axi_filter,
+
+	.dmc_nr = 1,
+	.chann_nr = 4,
+	.capability = {0X7FFF00FF3FDF, 0},
+	.fmt_attr = g12_pmu_format_attrs,
+};
+
+static const struct dmc_hw_info sm1_dmc_info = {
+	.enable		= dmc_g12_counter_enable,
+	.disable	= dmc_g12_counter_disable,
+	.irq_handler	= dmc_g12_irq_handler,
+	.get_counters	= dmc_g12_get_counters,
+	.set_axi_filter	= dmc_g12_set_axi_filter,
+
+	.dmc_nr = 1,
+	.chann_nr = 4,
+	.capability = {0X7EFF00FF07DF, 0},
+	.fmt_attr = g12_pmu_format_attrs,
+};
+
+static int g12_ddr_pmu_probe(struct platform_device *pdev)
+{
+	return meson_ddr_pmu_create(pdev);
+}
+
+static int g12_ddr_pmu_remove(struct platform_device *pdev)
+{
+	meson_ddr_pmu_remove(pdev);
+
+	return 0;
+}
+
+static const struct of_device_id meson_ddr_pmu_dt_match[] = {
+	{
+		.compatible = "amlogic,g12a-ddr-pmu",
+		.data = &g12a_dmc_info,
+	},
+	{
+		.compatible = "amlogic,g12b-ddr-pmu",
+		.data = &g12b_dmc_info,
+	},
+	{
+		.compatible = "amlogic,sm1-ddr-pmu",
+		.data = &sm1_dmc_info,
+	},
+	{}
+};
+
+static struct platform_driver g12_ddr_pmu_driver = {
+	.probe = g12_ddr_pmu_probe,
+	.remove = g12_ddr_pmu_remove,
+
+	.driver = {
+		.name = "meson-g12-ddr-pmu",
+		.of_match_table = meson_ddr_pmu_dt_match,
+	},
+};
+
+module_platform_driver(g12_ddr_pmu_driver);
+MODULE_AUTHOR("Jiucheng Xu");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Amlogic G12 series SoC DDR PMU");
diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c
new file mode 100644
index 0000000000..cd2de44b61
--- /dev/null
+++ b/drivers/perf/apple_m1_cpu_pmu.c
@@ -0,0 +1,615 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CPU PMU driver for the Apple M1 and derivatives
+ *
+ * Copyright (C) 2021 Google LLC
+ *
+ * Author: Marc Zyngier <maz@kernel.org>
+ *
+ * Most of the information used in this driver was provided by the
+ * Asahi Linux project. The rest was experimentally discovered.
+ */
+
+#include <linux/of.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/platform_device.h>
+
+#include <asm/apple_m1_pmu.h>
+#include <asm/irq_regs.h>
+#include <asm/perf_event.h>
+
+#define M1_PMU_NR_COUNTERS		10
+
+#define M1_PMU_CFG_EVENT		GENMASK(7, 0)
+
+#define ANY_BUT_0_1			GENMASK(9, 2)
+#define ONLY_2_TO_7			GENMASK(7, 2)
+#define ONLY_2_4_6			(BIT(2) | BIT(4) | BIT(6))
+#define ONLY_5_6_7			(BIT(5) | BIT(6) | BIT(7))
+
+/*
+ * Description of the events we actually know about, as well as those with
+ * a specific counter affinity. Yes, this is a grand total of two known
+ * counters, and the rest is anybody's guess.
+ *
+ * Not all counters can count all events. Counters #0 and #1 are wired to
+ * count cycles and instructions respectively, and some events have
+ * bizarre mappings (every other counter, or even *one* counter). These
+ * restrictions equally apply to both P and E cores.
+ *
+ * It is worth noting that the PMUs attached to P and E cores are likely
+ * to be different because the underlying uarches are different. At the
+ * moment, we don't really need to distinguish between the two because we
+ * know next to nothing about the events themselves, and we already have
+ * per cpu-type PMU abstractions.
+ *
+ * If we eventually find out that the events are different across
+ * implementations, we'll have to introduce per cpu-type tables.
+ */
+enum m1_pmu_events {
+	M1_PMU_PERFCTR_UNKNOWN_01	= 0x01,
+	M1_PMU_PERFCTR_CPU_CYCLES	= 0x02,
+	M1_PMU_PERFCTR_INSTRUCTIONS	= 0x8c,
+	M1_PMU_PERFCTR_UNKNOWN_8d	= 0x8d,
+	M1_PMU_PERFCTR_UNKNOWN_8e	= 0x8e,
+	M1_PMU_PERFCTR_UNKNOWN_8f	= 0x8f,
+	M1_PMU_PERFCTR_UNKNOWN_90	= 0x90,
+	M1_PMU_PERFCTR_UNKNOWN_93	= 0x93,
+	M1_PMU_PERFCTR_UNKNOWN_94	= 0x94,
+	M1_PMU_PERFCTR_UNKNOWN_95	= 0x95,
+	M1_PMU_PERFCTR_UNKNOWN_96	= 0x96,
+	M1_PMU_PERFCTR_UNKNOWN_97	= 0x97,
+	M1_PMU_PERFCTR_UNKNOWN_98	= 0x98,
+	M1_PMU_PERFCTR_UNKNOWN_99	= 0x99,
+	M1_PMU_PERFCTR_UNKNOWN_9a	= 0x9a,
+	M1_PMU_PERFCTR_UNKNOWN_9b	= 0x9b,
+	M1_PMU_PERFCTR_UNKNOWN_9c	= 0x9c,
+	M1_PMU_PERFCTR_UNKNOWN_9f	= 0x9f,
+	M1_PMU_PERFCTR_UNKNOWN_bf	= 0xbf,
+	M1_PMU_PERFCTR_UNKNOWN_c0	= 0xc0,
+	M1_PMU_PERFCTR_UNKNOWN_c1	= 0xc1,
+	M1_PMU_PERFCTR_UNKNOWN_c4	= 0xc4,
+	M1_PMU_PERFCTR_UNKNOWN_c5	= 0xc5,
+	M1_PMU_PERFCTR_UNKNOWN_c6	= 0xc6,
+	M1_PMU_PERFCTR_UNKNOWN_c8	= 0xc8,
+	M1_PMU_PERFCTR_UNKNOWN_ca	= 0xca,
+	M1_PMU_PERFCTR_UNKNOWN_cb	= 0xcb,
+	M1_PMU_PERFCTR_UNKNOWN_f5	= 0xf5,
+	M1_PMU_PERFCTR_UNKNOWN_f6	= 0xf6,
+	M1_PMU_PERFCTR_UNKNOWN_f7	= 0xf7,
+	M1_PMU_PERFCTR_UNKNOWN_f8	= 0xf8,
+	M1_PMU_PERFCTR_UNKNOWN_fd	= 0xfd,
+	M1_PMU_PERFCTR_LAST		= M1_PMU_CFG_EVENT,
+
+	/*
+	 * From this point onwards, these are not actual HW events,
+	 * but attributes that get stored in hw->config_base.
+	 */
+	M1_PMU_CFG_COUNT_USER		= BIT(8),
+	M1_PMU_CFG_COUNT_KERNEL		= BIT(9),
+};
+
+/*
+ * Per-event affinity table. Most events can be installed on counter
+ * 2-9, but there are a number of exceptions. Note that this table
+ * has been created experimentally, and I wouldn't be surprised if more
+ * counters had strange affinities.
+ */
+static const u16 m1_pmu_event_affinity[M1_PMU_PERFCTR_LAST + 1] = {
+	[0 ... M1_PMU_PERFCTR_LAST]	= ANY_BUT_0_1,
+	[M1_PMU_PERFCTR_UNKNOWN_01]	= BIT(7),
+	[M1_PMU_PERFCTR_CPU_CYCLES]	= ANY_BUT_0_1 | BIT(0),
+	[M1_PMU_PERFCTR_INSTRUCTIONS]	= BIT(7) | BIT(1),
+	[M1_PMU_PERFCTR_UNKNOWN_8d]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_8e]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_8f]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_90]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_93]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_94]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_95]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_96]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_97]	= BIT(7),
+	[M1_PMU_PERFCTR_UNKNOWN_98]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_99]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_9a]	= BIT(7),
+	[M1_PMU_PERFCTR_UNKNOWN_9b]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_9c]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_9f]	= BIT(7),
+	[M1_PMU_PERFCTR_UNKNOWN_bf]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_c0]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_c1]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_c4]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_c5]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_c6]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_c8]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_ca]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_cb]	= ONLY_5_6_7,
+	[M1_PMU_PERFCTR_UNKNOWN_f5]	= ONLY_2_4_6,
+	[M1_PMU_PERFCTR_UNKNOWN_f6]	= ONLY_2_4_6,
+	[M1_PMU_PERFCTR_UNKNOWN_f7]	= ONLY_2_4_6,
+	[M1_PMU_PERFCTR_UNKNOWN_f8]	= ONLY_2_TO_7,
+	[M1_PMU_PERFCTR_UNKNOWN_fd]	= ONLY_2_4_6,
+};
+
+static const unsigned m1_pmu_perf_map[PERF_COUNT_HW_MAX] = {
+	PERF_MAP_ALL_UNSUPPORTED,
+	[PERF_COUNT_HW_CPU_CYCLES]	= M1_PMU_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]	= M1_PMU_PERFCTR_INSTRUCTIONS,
+	/* No idea about the rest yet */
+};
+
+/* sysfs definitions */
+static ssize_t m1_pmu_events_sysfs_show(struct device *dev,
+					struct device_attribute *attr,
+					char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+
+	return sprintf(page, "event=0x%04llx\n", pmu_attr->id);
+}
+
+#define M1_PMU_EVENT_ATTR(name, config)					\
+	PMU_EVENT_ATTR_ID(name, m1_pmu_events_sysfs_show, config)
+
+static struct attribute *m1_pmu_event_attrs[] = {
+	M1_PMU_EVENT_ATTR(cycles, M1_PMU_PERFCTR_CPU_CYCLES),
+	M1_PMU_EVENT_ATTR(instructions, M1_PMU_PERFCTR_INSTRUCTIONS),
+	NULL,
+};
+
+static const struct attribute_group m1_pmu_events_attr_group = {
+	.name = "events",
+	.attrs = m1_pmu_event_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+
+static struct attribute *m1_pmu_format_attrs[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static const struct attribute_group m1_pmu_format_attr_group = {
+	.name = "format",
+	.attrs = m1_pmu_format_attrs,
+};
+
+/* Low level accessors. No synchronisation. */
+#define PMU_READ_COUNTER(_idx)						\
+	case _idx:	return read_sysreg_s(SYS_IMP_APL_PMC## _idx ##_EL1)
+
+#define PMU_WRITE_COUNTER(_val, _idx)					\
+	case _idx:							\
+		write_sysreg_s(_val, SYS_IMP_APL_PMC## _idx ##_EL1);	\
+		return
+
+static u64 m1_pmu_read_hw_counter(unsigned int index)
+{
+	switch (index) {
+		PMU_READ_COUNTER(0);
+		PMU_READ_COUNTER(1);
+		PMU_READ_COUNTER(2);
+		PMU_READ_COUNTER(3);
+		PMU_READ_COUNTER(4);
+		PMU_READ_COUNTER(5);
+		PMU_READ_COUNTER(6);
+		PMU_READ_COUNTER(7);
+		PMU_READ_COUNTER(8);
+		PMU_READ_COUNTER(9);
+	}
+
+	BUG();
+}
+
+static void m1_pmu_write_hw_counter(u64 val, unsigned int index)
+{
+	switch (index) {
+		PMU_WRITE_COUNTER(val, 0);
+		PMU_WRITE_COUNTER(val, 1);
+		PMU_WRITE_COUNTER(val, 2);
+		PMU_WRITE_COUNTER(val, 3);
+		PMU_WRITE_COUNTER(val, 4);
+		PMU_WRITE_COUNTER(val, 5);
+		PMU_WRITE_COUNTER(val, 6);
+		PMU_WRITE_COUNTER(val, 7);
+		PMU_WRITE_COUNTER(val, 8);
+		PMU_WRITE_COUNTER(val, 9);
+	}
+
+	BUG();
+}
+
+#define get_bit_offset(index, mask)	(__ffs(mask) + (index))
+
+static void __m1_pmu_enable_counter(unsigned int index, bool en)
+{
+	u64 val, bit;
+
+	switch (index) {
+	case 0 ... 7:
+		bit = BIT(get_bit_offset(index, PMCR0_CNT_ENABLE_0_7));
+		break;
+	case 8 ... 9:
+		bit = BIT(get_bit_offset(index - 8, PMCR0_CNT_ENABLE_8_9));
+		break;
+	default:
+		BUG();
+	}
+
+	val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
+
+	if (en)
+		val |= bit;
+	else
+		val &= ~bit;
+
+	write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
+}
+
+static void m1_pmu_enable_counter(unsigned int index)
+{
+	__m1_pmu_enable_counter(index, true);
+}
+
+static void m1_pmu_disable_counter(unsigned int index)
+{
+	__m1_pmu_enable_counter(index, false);
+}
+
+static void __m1_pmu_enable_counter_interrupt(unsigned int index, bool en)
+{
+	u64 val, bit;
+
+	switch (index) {
+	case 0 ... 7:
+		bit = BIT(get_bit_offset(index, PMCR0_PMI_ENABLE_0_7));
+		break;
+	case 8 ... 9:
+		bit = BIT(get_bit_offset(index - 8, PMCR0_PMI_ENABLE_8_9));
+		break;
+	default:
+		BUG();
+	}
+
+	val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
+
+	if (en)
+		val |= bit;
+	else
+		val &= ~bit;
+
+	write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
+}
+
+static void m1_pmu_enable_counter_interrupt(unsigned int index)
+{
+	__m1_pmu_enable_counter_interrupt(index, true);
+}
+
+static void m1_pmu_disable_counter_interrupt(unsigned int index)
+{
+	__m1_pmu_enable_counter_interrupt(index, false);
+}
+
+static void m1_pmu_configure_counter(unsigned int index, u8 event,
+				     bool user, bool kernel)
+{
+	u64 val, user_bit, kernel_bit;
+	int shift;
+
+	switch (index) {
+	case 0 ... 7:
+		user_bit = BIT(get_bit_offset(index, PMCR1_COUNT_A64_EL0_0_7));
+		kernel_bit = BIT(get_bit_offset(index, PMCR1_COUNT_A64_EL1_0_7));
+		break;
+	case 8 ... 9:
+		user_bit = BIT(get_bit_offset(index - 8, PMCR1_COUNT_A64_EL0_8_9));
+		kernel_bit = BIT(get_bit_offset(index - 8, PMCR1_COUNT_A64_EL1_8_9));
+		break;
+	default:
+		BUG();
+	}
+
+	val = read_sysreg_s(SYS_IMP_APL_PMCR1_EL1);
+
+	if (user)
+		val |= user_bit;
+	else
+		val &= ~user_bit;
+
+	if (kernel)
+		val |= kernel_bit;
+	else
+		val &= ~kernel_bit;
+
+	write_sysreg_s(val, SYS_IMP_APL_PMCR1_EL1);
+
+	/*
+	 * Counters 0 and 1 have fixed events. For anything else,
+	 * place the event at the expected location in the relevant
+	 * register (PMESR0 holds the event configuration for counters
+	 * 2-5, resp. PMESR1 for counters 6-9).
+	 */
+	switch (index) {
+	case 0 ... 1:
+		break;
+	case 2 ... 5:
+		shift = (index - 2) * 8;
+		val = read_sysreg_s(SYS_IMP_APL_PMESR0_EL1);
+		val &= ~((u64)0xff << shift);
+		val |= (u64)event << shift;
+		write_sysreg_s(val, SYS_IMP_APL_PMESR0_EL1);
+		break;
+	case 6 ... 9:
+		shift = (index - 6) * 8;
+		val = read_sysreg_s(SYS_IMP_APL_PMESR1_EL1);
+		val &= ~((u64)0xff << shift);
+		val |= (u64)event << shift;
+		write_sysreg_s(val, SYS_IMP_APL_PMESR1_EL1);
+		break;
+	}
+}
+
+/* arm_pmu backend */
+static void m1_pmu_enable_event(struct perf_event *event)
+{
+	bool user, kernel;
+	u8 evt;
+
+	evt = event->hw.config_base & M1_PMU_CFG_EVENT;
+	user = event->hw.config_base & M1_PMU_CFG_COUNT_USER;
+	kernel = event->hw.config_base & M1_PMU_CFG_COUNT_KERNEL;
+
+	m1_pmu_disable_counter_interrupt(event->hw.idx);
+	m1_pmu_disable_counter(event->hw.idx);
+	isb();
+
+	m1_pmu_configure_counter(event->hw.idx, evt, user, kernel);
+	m1_pmu_enable_counter(event->hw.idx);
+	m1_pmu_enable_counter_interrupt(event->hw.idx);
+	isb();
+}
+
+static void m1_pmu_disable_event(struct perf_event *event)
+{
+	m1_pmu_disable_counter_interrupt(event->hw.idx);
+	m1_pmu_disable_counter(event->hw.idx);
+	isb();
+}
+
+static irqreturn_t m1_pmu_handle_irq(struct arm_pmu *cpu_pmu)
+{
+	struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
+	struct pt_regs *regs;
+	u64 overflow, state;
+	int idx;
+
+	overflow = read_sysreg_s(SYS_IMP_APL_PMSR_EL1);
+	if (!overflow) {
+		/* Spurious interrupt? */
+		state = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
+		state &= ~PMCR0_IACT;
+		write_sysreg_s(state, SYS_IMP_APL_PMCR0_EL1);
+		isb();
+		return IRQ_NONE;
+	}
+
+	cpu_pmu->stop(cpu_pmu);
+
+	regs = get_irq_regs();
+
+	for (idx = 0; idx < cpu_pmu->num_events; idx++) {
+		struct perf_event *event = cpuc->events[idx];
+		struct perf_sample_data data;
+
+		if (!event)
+			continue;
+
+		armpmu_event_update(event);
+		perf_sample_data_init(&data, 0, event->hw.last_period);
+		if (!armpmu_event_set_period(event))
+			continue;
+
+		if (perf_event_overflow(event, &data, regs))
+			m1_pmu_disable_event(event);
+	}
+
+	cpu_pmu->start(cpu_pmu);
+
+	return IRQ_HANDLED;
+}
+
+static u64 m1_pmu_read_counter(struct perf_event *event)
+{
+	return m1_pmu_read_hw_counter(event->hw.idx);
+}
+
+static void m1_pmu_write_counter(struct perf_event *event, u64 value)
+{
+	m1_pmu_write_hw_counter(value, event->hw.idx);
+	isb();
+}
+
+static int m1_pmu_get_event_idx(struct pmu_hw_events *cpuc,
+				struct perf_event *event)
+{
+	unsigned long evtype = event->hw.config_base & M1_PMU_CFG_EVENT;
+	unsigned long affinity = m1_pmu_event_affinity[evtype];
+	int idx;
+
+	/*
+	 * Place the event on the first free counter that can count
+	 * this event.
+	 *
+	 * We could do a better job if we had a view of all the events
+	 * counting on the PMU at any given time, and by placing the
+	 * most constraining events first.
+	 */
+	for_each_set_bit(idx, &affinity, M1_PMU_NR_COUNTERS) {
+		if (!test_and_set_bit(idx, cpuc->used_mask))
+			return idx;
+	}
+
+	return -EAGAIN;
+}
+
+static void m1_pmu_clear_event_idx(struct pmu_hw_events *cpuc,
+				   struct perf_event *event)
+{
+	clear_bit(event->hw.idx, cpuc->used_mask);
+}
+
+static void __m1_pmu_set_mode(u8 mode)
+{
+	u64 val;
+
+	val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
+	val &= ~(PMCR0_IMODE | PMCR0_IACT);
+	val |= FIELD_PREP(PMCR0_IMODE, mode);
+	write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
+	isb();
+}
+
+static void m1_pmu_start(struct arm_pmu *cpu_pmu)
+{
+	__m1_pmu_set_mode(PMCR0_IMODE_FIQ);
+}
+
+static void m1_pmu_stop(struct arm_pmu *cpu_pmu)
+{
+	__m1_pmu_set_mode(PMCR0_IMODE_OFF);
+}
+
+static int m1_pmu_map_event(struct perf_event *event)
+{
+	/*
+	 * Although the counters are 48bit wide, bit 47 is what
+	 * triggers the overflow interrupt. Advertise the counters
+	 * being 47bit wide to mimick the behaviour of the ARM PMU.
+	 */
+	event->hw.flags |= ARMPMU_EVT_47BIT;
+	return armpmu_map_event(event, &m1_pmu_perf_map, NULL, M1_PMU_CFG_EVENT);
+}
+
+static int m2_pmu_map_event(struct perf_event *event)
+{
+	/*
+	 * Same deal as the above, except that M2 has 64bit counters.
+	 * Which, as far as we're concerned, actually means 63 bits.
+	 * Yes, this is getting awkward.
+	 */
+	event->hw.flags |= ARMPMU_EVT_63BIT;
+	return armpmu_map_event(event, &m1_pmu_perf_map, NULL, M1_PMU_CFG_EVENT);
+}
+
+static void m1_pmu_reset(void *info)
+{
+	int i;
+
+	__m1_pmu_set_mode(PMCR0_IMODE_OFF);
+
+	for (i = 0; i < M1_PMU_NR_COUNTERS; i++) {
+		m1_pmu_disable_counter(i);
+		m1_pmu_disable_counter_interrupt(i);
+		m1_pmu_write_hw_counter(0, i);
+	}
+
+	isb();
+}
+
+static int m1_pmu_set_event_filter(struct hw_perf_event *event,
+				   struct perf_event_attr *attr)
+{
+	unsigned long config_base = 0;
+
+	if (!attr->exclude_guest)
+		return -EINVAL;
+	if (!attr->exclude_kernel)
+		config_base |= M1_PMU_CFG_COUNT_KERNEL;
+	if (!attr->exclude_user)
+		config_base |= M1_PMU_CFG_COUNT_USER;
+
+	event->config_base = config_base;
+
+	return 0;
+}
+
+static int m1_pmu_init(struct arm_pmu *cpu_pmu, u32 flags)
+{
+	cpu_pmu->handle_irq	  = m1_pmu_handle_irq;
+	cpu_pmu->enable		  = m1_pmu_enable_event;
+	cpu_pmu->disable	  = m1_pmu_disable_event;
+	cpu_pmu->read_counter	  = m1_pmu_read_counter;
+	cpu_pmu->write_counter	  = m1_pmu_write_counter;
+	cpu_pmu->get_event_idx	  = m1_pmu_get_event_idx;
+	cpu_pmu->clear_event_idx  = m1_pmu_clear_event_idx;
+	cpu_pmu->start		  = m1_pmu_start;
+	cpu_pmu->stop		  = m1_pmu_stop;
+
+	if (flags & ARMPMU_EVT_47BIT)
+		cpu_pmu->map_event = m1_pmu_map_event;
+	else if (flags & ARMPMU_EVT_63BIT)
+		cpu_pmu->map_event = m2_pmu_map_event;
+	else
+		return WARN_ON(-EINVAL);
+
+	cpu_pmu->reset		  = m1_pmu_reset;
+	cpu_pmu->set_event_filter = m1_pmu_set_event_filter;
+
+	cpu_pmu->num_events	  = M1_PMU_NR_COUNTERS;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &m1_pmu_events_attr_group;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &m1_pmu_format_attr_group;
+	return 0;
+}
+
+/* Device driver gunk */
+static int m1_pmu_ice_init(struct arm_pmu *cpu_pmu)
+{
+	cpu_pmu->name = "apple_icestorm_pmu";
+	return m1_pmu_init(cpu_pmu, ARMPMU_EVT_47BIT);
+}
+
+static int m1_pmu_fire_init(struct arm_pmu *cpu_pmu)
+{
+	cpu_pmu->name = "apple_firestorm_pmu";
+	return m1_pmu_init(cpu_pmu, ARMPMU_EVT_47BIT);
+}
+
+static int m2_pmu_avalanche_init(struct arm_pmu *cpu_pmu)
+{
+	cpu_pmu->name = "apple_avalanche_pmu";
+	return m1_pmu_init(cpu_pmu, ARMPMU_EVT_63BIT);
+}
+
+static int m2_pmu_blizzard_init(struct arm_pmu *cpu_pmu)
+{
+	cpu_pmu->name = "apple_blizzard_pmu";
+	return m1_pmu_init(cpu_pmu, ARMPMU_EVT_63BIT);
+}
+
+static const struct of_device_id m1_pmu_of_device_ids[] = {
+	{ .compatible = "apple,avalanche-pmu",	.data = m2_pmu_avalanche_init, },
+	{ .compatible = "apple,blizzard-pmu",	.data = m2_pmu_blizzard_init, },
+	{ .compatible = "apple,icestorm-pmu",	.data = m1_pmu_ice_init, },
+	{ .compatible = "apple,firestorm-pmu",	.data = m1_pmu_fire_init, },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, m1_pmu_of_device_ids);
+
+static int m1_pmu_device_probe(struct platform_device *pdev)
+{
+	return arm_pmu_device_probe(pdev, m1_pmu_of_device_ids, NULL);
+}
+
+static struct platform_driver m1_pmu_driver = {
+	.driver		= {
+		.name			= "apple-m1-cpu-pmu",
+		.of_match_table		= m1_pmu_of_device_ids,
+		.suppress_bind_attrs	= true,
+	},
+	.probe		= m1_pmu_device_probe,
+};
+
+module_platform_driver(m1_pmu_driver);
diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
new file mode 100644
index 0000000000..61de861eaf
--- /dev/null
+++ b/drivers/perf/arm-cci.c
@@ -0,0 +1,1724 @@
+// SPDX-License-Identifier: GPL-2.0
+// CCI Cache Coherent Interconnect PMU driver
+// Copyright (C) 2013-2018 Arm Ltd.
+// Author: Punit Agrawal <punit.agrawal@arm.com>, Suzuki Poulose <suzuki.poulose@arm.com>
+
+#include <linux/arm-cci.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#define DRIVER_NAME		"ARM-CCI PMU"
+
+#define CCI_PMCR		0x0100
+#define CCI_PID2		0x0fe8
+
+#define CCI_PMCR_CEN		0x00000001
+#define CCI_PMCR_NCNT_MASK	0x0000f800
+#define CCI_PMCR_NCNT_SHIFT	11
+
+#define CCI_PID2_REV_MASK	0xf0
+#define CCI_PID2_REV_SHIFT	4
+
+#define CCI_PMU_EVT_SEL		0x000
+#define CCI_PMU_CNTR		0x004
+#define CCI_PMU_CNTR_CTRL	0x008
+#define CCI_PMU_OVRFLW		0x00c
+
+#define CCI_PMU_OVRFLW_FLAG	1
+
+#define CCI_PMU_CNTR_SIZE(model)	((model)->cntr_size)
+#define CCI_PMU_CNTR_BASE(model, idx)	((idx) * CCI_PMU_CNTR_SIZE(model))
+#define CCI_PMU_CNTR_MASK		((1ULL << 32) - 1)
+#define CCI_PMU_CNTR_LAST(cci_pmu)	(cci_pmu->num_cntrs - 1)
+
+#define CCI_PMU_MAX_HW_CNTRS(model) \
+	((model)->num_hw_cntrs + (model)->fixed_hw_cntrs)
+
+/* Types of interfaces that can generate events */
+enum {
+	CCI_IF_SLAVE,
+	CCI_IF_MASTER,
+#ifdef CONFIG_ARM_CCI5xx_PMU
+	CCI_IF_GLOBAL,
+#endif
+	CCI_IF_MAX,
+};
+
+#define NUM_HW_CNTRS_CII_4XX	4
+#define NUM_HW_CNTRS_CII_5XX	8
+#define NUM_HW_CNTRS_MAX	NUM_HW_CNTRS_CII_5XX
+
+#define FIXED_HW_CNTRS_CII_4XX	1
+#define FIXED_HW_CNTRS_CII_5XX	0
+#define FIXED_HW_CNTRS_MAX	FIXED_HW_CNTRS_CII_4XX
+
+#define HW_CNTRS_MAX		(NUM_HW_CNTRS_MAX + FIXED_HW_CNTRS_MAX)
+
+struct event_range {
+	u32 min;
+	u32 max;
+};
+
+struct cci_pmu_hw_events {
+	struct perf_event **events;
+	unsigned long *used_mask;
+	raw_spinlock_t pmu_lock;
+};
+
+struct cci_pmu;
+/*
+ * struct cci_pmu_model:
+ * @fixed_hw_cntrs - Number of fixed event counters
+ * @num_hw_cntrs - Maximum number of programmable event counters
+ * @cntr_size - Size of an event counter mapping
+ */
+struct cci_pmu_model {
+	char *name;
+	u32 fixed_hw_cntrs;
+	u32 num_hw_cntrs;
+	u32 cntr_size;
+	struct attribute **format_attrs;
+	struct attribute **event_attrs;
+	struct event_range event_ranges[CCI_IF_MAX];
+	int (*validate_hw_event)(struct cci_pmu *, unsigned long);
+	int (*get_event_idx)(struct cci_pmu *, struct cci_pmu_hw_events *, unsigned long);
+	void (*write_counters)(struct cci_pmu *, unsigned long *);
+};
+
+static struct cci_pmu_model cci_pmu_models[];
+
+struct cci_pmu {
+	void __iomem *base;
+	void __iomem *ctrl_base;
+	struct pmu pmu;
+	int cpu;
+	int nr_irqs;
+	int *irqs;
+	unsigned long active_irqs;
+	const struct cci_pmu_model *model;
+	struct cci_pmu_hw_events hw_events;
+	struct platform_device *plat_device;
+	int num_cntrs;
+	atomic_t active_events;
+	struct mutex reserve_mutex;
+};
+
+#define to_cci_pmu(c)	(container_of(c, struct cci_pmu, pmu))
+
+static struct cci_pmu *g_cci_pmu;
+
+enum cci_models {
+#ifdef CONFIG_ARM_CCI400_PMU
+	CCI400_R0,
+	CCI400_R1,
+#endif
+#ifdef CONFIG_ARM_CCI5xx_PMU
+	CCI500_R0,
+	CCI550_R0,
+#endif
+	CCI_MODEL_MAX
+};
+
+static void pmu_write_counters(struct cci_pmu *cci_pmu,
+				 unsigned long *mask);
+static ssize_t __maybe_unused cci_pmu_format_show(struct device *dev,
+			struct device_attribute *attr, char *buf);
+static ssize_t __maybe_unused cci_pmu_event_show(struct device *dev,
+			struct device_attribute *attr, char *buf);
+
+#define CCI_EXT_ATTR_ENTRY(_name, _func, _config) 				\
+	&((struct dev_ext_attribute[]) {					\
+		{ __ATTR(_name, S_IRUGO, _func, NULL), (void *)_config }	\
+	})[0].attr.attr
+
+#define CCI_FORMAT_EXT_ATTR_ENTRY(_name, _config) \
+	CCI_EXT_ATTR_ENTRY(_name, cci_pmu_format_show, (char *)_config)
+#define CCI_EVENT_EXT_ATTR_ENTRY(_name, _config) \
+	CCI_EXT_ATTR_ENTRY(_name, cci_pmu_event_show, (unsigned long)_config)
+
+/* CCI400 PMU Specific definitions */
+
+#ifdef CONFIG_ARM_CCI400_PMU
+
+/* Port ids */
+#define CCI400_PORT_S0		0
+#define CCI400_PORT_S1		1
+#define CCI400_PORT_S2		2
+#define CCI400_PORT_S3		3
+#define CCI400_PORT_S4		4
+#define CCI400_PORT_M0		5
+#define CCI400_PORT_M1		6
+#define CCI400_PORT_M2		7
+
+#define CCI400_R1_PX		5
+
+/*
+ * Instead of an event id to monitor CCI cycles, a dedicated counter is
+ * provided. Use 0xff to represent CCI cycles and hope that no future revisions
+ * make use of this event in hardware.
+ */
+enum cci400_perf_events {
+	CCI400_PMU_CYCLES = 0xff
+};
+
+#define CCI400_PMU_CYCLE_CNTR_IDX	0
+#define CCI400_PMU_CNTR0_IDX		1
+
+/*
+ * CCI PMU event id is an 8-bit value made of two parts - bits 7:5 for one of 8
+ * ports and bits 4:0 are event codes. There are different event codes
+ * associated with each port type.
+ *
+ * Additionally, the range of events associated with the port types changed
+ * between Rev0 and Rev1.
+ *
+ * The constants below define the range of valid codes for each port type for
+ * the different revisions and are used to validate the event to be monitored.
+ */
+
+#define CCI400_PMU_EVENT_MASK		0xffUL
+#define CCI400_PMU_EVENT_SOURCE_SHIFT	5
+#define CCI400_PMU_EVENT_SOURCE_MASK	0x7
+#define CCI400_PMU_EVENT_CODE_SHIFT	0
+#define CCI400_PMU_EVENT_CODE_MASK	0x1f
+#define CCI400_PMU_EVENT_SOURCE(event) \
+	((event >> CCI400_PMU_EVENT_SOURCE_SHIFT) & \
+			CCI400_PMU_EVENT_SOURCE_MASK)
+#define CCI400_PMU_EVENT_CODE(event) \
+	((event >> CCI400_PMU_EVENT_CODE_SHIFT) & CCI400_PMU_EVENT_CODE_MASK)
+
+#define CCI400_R0_SLAVE_PORT_MIN_EV	0x00
+#define CCI400_R0_SLAVE_PORT_MAX_EV	0x13
+#define CCI400_R0_MASTER_PORT_MIN_EV	0x14
+#define CCI400_R0_MASTER_PORT_MAX_EV	0x1a
+
+#define CCI400_R1_SLAVE_PORT_MIN_EV	0x00
+#define CCI400_R1_SLAVE_PORT_MAX_EV	0x14
+#define CCI400_R1_MASTER_PORT_MIN_EV	0x00
+#define CCI400_R1_MASTER_PORT_MAX_EV	0x11
+
+#define CCI400_CYCLE_EVENT_EXT_ATTR_ENTRY(_name, _config) \
+	CCI_EXT_ATTR_ENTRY(_name, cci400_pmu_cycle_event_show, \
+					(unsigned long)_config)
+
+static ssize_t cci400_pmu_cycle_event_show(struct device *dev,
+			struct device_attribute *attr, char *buf);
+
+static struct attribute *cci400_pmu_format_attrs[] = {
+	CCI_FORMAT_EXT_ATTR_ENTRY(event, "config:0-4"),
+	CCI_FORMAT_EXT_ATTR_ENTRY(source, "config:5-7"),
+	NULL
+};
+
+static struct attribute *cci400_r0_pmu_event_attrs[] = {
+	/* Slave events */
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_any, 0x0),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_device, 0x01),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_normal_or_nonshareable, 0x2),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_inner_or_outershareable, 0x3),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_cache_maintenance, 0x4),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_mem_barrier, 0x5),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_sync_barrier, 0x6),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg, 0x7),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg_sync, 0x8),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_stall_tt_full, 0x9),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_last_hs_snoop, 0xA),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_stall_rvalids_h_rready_l, 0xB),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_any, 0xC),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_device, 0xD),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_normal_or_nonshareable, 0xE),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_inner_or_outershare_wback_wclean, 0xF),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_write_unique, 0x10),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_write_line_unique, 0x11),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_evict, 0x12),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_stall_tt_full, 0x13),
+	/* Master events */
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_retry_speculative_fetch, 0x14),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_addr_hazard, 0x15),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_id_hazard, 0x16),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_tt_full, 0x17),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_barrier_hazard, 0x18),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_barrier_hazard, 0x19),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_tt_full, 0x1A),
+	/* Special event for cycles counter */
+	CCI400_CYCLE_EVENT_EXT_ATTR_ENTRY(cycles, 0xff),
+	NULL
+};
+
+static struct attribute *cci400_r1_pmu_event_attrs[] = {
+	/* Slave events */
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_any, 0x0),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_device, 0x01),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_normal_or_nonshareable, 0x2),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_inner_or_outershareable, 0x3),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_cache_maintenance, 0x4),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_mem_barrier, 0x5),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_sync_barrier, 0x6),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg, 0x7),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg_sync, 0x8),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_stall_tt_full, 0x9),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_last_hs_snoop, 0xA),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_stall_rvalids_h_rready_l, 0xB),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_any, 0xC),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_device, 0xD),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_normal_or_nonshareable, 0xE),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_inner_or_outershare_wback_wclean, 0xF),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_write_unique, 0x10),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_write_line_unique, 0x11),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_evict, 0x12),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_stall_tt_full, 0x13),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_stall_slave_id_hazard, 0x14),
+	/* Master events */
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_retry_speculative_fetch, 0x0),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_stall_cycle_addr_hazard, 0x1),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_master_id_hazard, 0x2),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_hi_prio_rtq_full, 0x3),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_barrier_hazard, 0x4),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_barrier_hazard, 0x5),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_wtq_full, 0x6),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_low_prio_rtq_full, 0x7),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_mid_prio_rtq_full, 0x8),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_qvn_vn0, 0x9),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_qvn_vn1, 0xA),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_qvn_vn2, 0xB),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_qvn_vn3, 0xC),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_qvn_vn0, 0xD),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_qvn_vn1, 0xE),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_qvn_vn2, 0xF),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_qvn_vn3, 0x10),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_unique_or_line_unique_addr_hazard, 0x11),
+	/* Special event for cycles counter */
+	CCI400_CYCLE_EVENT_EXT_ATTR_ENTRY(cycles, 0xff),
+	NULL
+};
+
+static ssize_t cci400_pmu_cycle_event_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *eattr = container_of(attr,
+				struct dev_ext_attribute, attr);
+	return sysfs_emit(buf, "config=0x%lx\n", (unsigned long)eattr->var);
+}
+
+static int cci400_get_event_idx(struct cci_pmu *cci_pmu,
+				struct cci_pmu_hw_events *hw,
+				unsigned long cci_event)
+{
+	int idx;
+
+	/* cycles event idx is fixed */
+	if (cci_event == CCI400_PMU_CYCLES) {
+		if (test_and_set_bit(CCI400_PMU_CYCLE_CNTR_IDX, hw->used_mask))
+			return -EAGAIN;
+
+		return CCI400_PMU_CYCLE_CNTR_IDX;
+	}
+
+	for (idx = CCI400_PMU_CNTR0_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); ++idx)
+		if (!test_and_set_bit(idx, hw->used_mask))
+			return idx;
+
+	/* No counters available */
+	return -EAGAIN;
+}
+
+static int cci400_validate_hw_event(struct cci_pmu *cci_pmu, unsigned long hw_event)
+{
+	u8 ev_source = CCI400_PMU_EVENT_SOURCE(hw_event);
+	u8 ev_code = CCI400_PMU_EVENT_CODE(hw_event);
+	int if_type;
+
+	if (hw_event & ~CCI400_PMU_EVENT_MASK)
+		return -ENOENT;
+
+	if (hw_event == CCI400_PMU_CYCLES)
+		return hw_event;
+
+	switch (ev_source) {
+	case CCI400_PORT_S0:
+	case CCI400_PORT_S1:
+	case CCI400_PORT_S2:
+	case CCI400_PORT_S3:
+	case CCI400_PORT_S4:
+		/* Slave Interface */
+		if_type = CCI_IF_SLAVE;
+		break;
+	case CCI400_PORT_M0:
+	case CCI400_PORT_M1:
+	case CCI400_PORT_M2:
+		/* Master Interface */
+		if_type = CCI_IF_MASTER;
+		break;
+	default:
+		return -ENOENT;
+	}
+
+	if (ev_code >= cci_pmu->model->event_ranges[if_type].min &&
+		ev_code <= cci_pmu->model->event_ranges[if_type].max)
+		return hw_event;
+
+	return -ENOENT;
+}
+
+static int probe_cci400_revision(struct cci_pmu *cci_pmu)
+{
+	int rev;
+	rev = readl_relaxed(cci_pmu->ctrl_base + CCI_PID2) & CCI_PID2_REV_MASK;
+	rev >>= CCI_PID2_REV_SHIFT;
+
+	if (rev < CCI400_R1_PX)
+		return CCI400_R0;
+	else
+		return CCI400_R1;
+}
+
+static const struct cci_pmu_model *probe_cci_model(struct cci_pmu *cci_pmu)
+{
+	if (platform_has_secure_cci_access())
+		return &cci_pmu_models[probe_cci400_revision(cci_pmu)];
+	return NULL;
+}
+#else	/* !CONFIG_ARM_CCI400_PMU */
+static inline struct cci_pmu_model *probe_cci_model(struct cci_pmu *cci_pmu)
+{
+	return NULL;
+}
+#endif	/* CONFIG_ARM_CCI400_PMU */
+
+#ifdef CONFIG_ARM_CCI5xx_PMU
+
+/*
+ * CCI5xx PMU event id is an 9-bit value made of two parts.
+ *	 bits [8:5] - Source for the event
+ *	 bits [4:0] - Event code (specific to type of interface)
+ *
+ *
+ */
+
+/* Port ids */
+#define CCI5xx_PORT_S0			0x0
+#define CCI5xx_PORT_S1			0x1
+#define CCI5xx_PORT_S2			0x2
+#define CCI5xx_PORT_S3			0x3
+#define CCI5xx_PORT_S4			0x4
+#define CCI5xx_PORT_S5			0x5
+#define CCI5xx_PORT_S6			0x6
+
+#define CCI5xx_PORT_M0			0x8
+#define CCI5xx_PORT_M1			0x9
+#define CCI5xx_PORT_M2			0xa
+#define CCI5xx_PORT_M3			0xb
+#define CCI5xx_PORT_M4			0xc
+#define CCI5xx_PORT_M5			0xd
+#define CCI5xx_PORT_M6			0xe
+
+#define CCI5xx_PORT_GLOBAL		0xf
+
+#define CCI5xx_PMU_EVENT_MASK		0x1ffUL
+#define CCI5xx_PMU_EVENT_SOURCE_SHIFT	0x5
+#define CCI5xx_PMU_EVENT_SOURCE_MASK	0xf
+#define CCI5xx_PMU_EVENT_CODE_SHIFT	0x0
+#define CCI5xx_PMU_EVENT_CODE_MASK	0x1f
+
+#define CCI5xx_PMU_EVENT_SOURCE(event)	\
+	((event >> CCI5xx_PMU_EVENT_SOURCE_SHIFT) & CCI5xx_PMU_EVENT_SOURCE_MASK)
+#define CCI5xx_PMU_EVENT_CODE(event)	\
+	((event >> CCI5xx_PMU_EVENT_CODE_SHIFT) & CCI5xx_PMU_EVENT_CODE_MASK)
+
+#define CCI5xx_SLAVE_PORT_MIN_EV	0x00
+#define CCI5xx_SLAVE_PORT_MAX_EV	0x1f
+#define CCI5xx_MASTER_PORT_MIN_EV	0x00
+#define CCI5xx_MASTER_PORT_MAX_EV	0x06
+#define CCI5xx_GLOBAL_PORT_MIN_EV	0x00
+#define CCI5xx_GLOBAL_PORT_MAX_EV	0x0f
+
+
+#define CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(_name, _config) \
+	CCI_EXT_ATTR_ENTRY(_name, cci5xx_pmu_global_event_show, \
+					(unsigned long) _config)
+
+static ssize_t cci5xx_pmu_global_event_show(struct device *dev,
+				struct device_attribute *attr, char *buf);
+
+static struct attribute *cci5xx_pmu_format_attrs[] = {
+	CCI_FORMAT_EXT_ATTR_ENTRY(event, "config:0-4"),
+	CCI_FORMAT_EXT_ATTR_ENTRY(source, "config:5-8"),
+	NULL,
+};
+
+static struct attribute *cci5xx_pmu_event_attrs[] = {
+	/* Slave events */
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_arvalid, 0x0),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_dev, 0x1),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_nonshareable, 0x2),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_shareable_non_alloc, 0x3),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_shareable_alloc, 0x4),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_invalidate, 0x5),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_cache_maint, 0x6),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg, 0x7),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_rval, 0x8),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_rlast_snoop, 0x9),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_awalid, 0xA),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_dev, 0xB),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_non_shareable, 0xC),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_share_wb, 0xD),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_share_wlu, 0xE),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_share_wunique, 0xF),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_evict, 0x10),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_wrevict, 0x11),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_w_data_beat, 0x12),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_srq_acvalid, 0x13),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_srq_read, 0x14),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_srq_clean, 0x15),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_srq_data_transfer_low, 0x16),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_stall_arvalid, 0x17),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_stall, 0x18),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_stall, 0x19),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_w_data_stall, 0x1A),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_w_resp_stall, 0x1B),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_srq_stall, 0x1C),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_s_data_stall, 0x1D),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rq_stall_ot_limit, 0x1E),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_r_stall_arbit, 0x1F),
+
+	/* Master events */
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_r_data_beat_any, 0x0),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_w_data_beat_any, 0x1),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall, 0x2),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_r_data_stall, 0x3),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall, 0x4),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_w_data_stall, 0x5),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_w_resp_stall, 0x6),
+
+	/* Global events */
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_0_1, 0x0),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_2_3, 0x1),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_4_5, 0x2),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_6_7, 0x3),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_0_1, 0x4),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_2_3, 0x5),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_4_5, 0x6),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_6_7, 0x7),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_back_invalidation, 0x8),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_alloc_busy, 0x9),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_tt_full, 0xA),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_wrq, 0xB),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_cd_hs, 0xC),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_rq_stall_addr_hazard, 0xD),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_stall_tt_full, 0xE),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_tzmp1_prot, 0xF),
+	NULL
+};
+
+static ssize_t cci5xx_pmu_global_event_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *eattr = container_of(attr,
+					struct dev_ext_attribute, attr);
+	/* Global events have single fixed source code */
+	return sysfs_emit(buf, "event=0x%lx,source=0x%x\n",
+			  (unsigned long)eattr->var, CCI5xx_PORT_GLOBAL);
+}
+
+/*
+ * CCI500 provides 8 independent event counters that can count
+ * any of the events available.
+ * CCI500 PMU event source ids
+ *	0x0-0x6 - Slave interfaces
+ *	0x8-0xD - Master interfaces
+ *	0xf     - Global Events
+ *	0x7,0xe - Reserved
+ */
+static int cci500_validate_hw_event(struct cci_pmu *cci_pmu,
+					unsigned long hw_event)
+{
+	u32 ev_source = CCI5xx_PMU_EVENT_SOURCE(hw_event);
+	u32 ev_code = CCI5xx_PMU_EVENT_CODE(hw_event);
+	int if_type;
+
+	if (hw_event & ~CCI5xx_PMU_EVENT_MASK)
+		return -ENOENT;
+
+	switch (ev_source) {
+	case CCI5xx_PORT_S0:
+	case CCI5xx_PORT_S1:
+	case CCI5xx_PORT_S2:
+	case CCI5xx_PORT_S3:
+	case CCI5xx_PORT_S4:
+	case CCI5xx_PORT_S5:
+	case CCI5xx_PORT_S6:
+		if_type = CCI_IF_SLAVE;
+		break;
+	case CCI5xx_PORT_M0:
+	case CCI5xx_PORT_M1:
+	case CCI5xx_PORT_M2:
+	case CCI5xx_PORT_M3:
+	case CCI5xx_PORT_M4:
+	case CCI5xx_PORT_M5:
+		if_type = CCI_IF_MASTER;
+		break;
+	case CCI5xx_PORT_GLOBAL:
+		if_type = CCI_IF_GLOBAL;
+		break;
+	default:
+		return -ENOENT;
+	}
+
+	if (ev_code >= cci_pmu->model->event_ranges[if_type].min &&
+		ev_code <= cci_pmu->model->event_ranges[if_type].max)
+		return hw_event;
+
+	return -ENOENT;
+}
+
+/*
+ * CCI550 provides 8 independent event counters that can count
+ * any of the events available.
+ * CCI550 PMU event source ids
+ *	0x0-0x6 - Slave interfaces
+ *	0x8-0xe - Master interfaces
+ *	0xf     - Global Events
+ *	0x7	- Reserved
+ */
+static int cci550_validate_hw_event(struct cci_pmu *cci_pmu,
+					unsigned long hw_event)
+{
+	u32 ev_source = CCI5xx_PMU_EVENT_SOURCE(hw_event);
+	u32 ev_code = CCI5xx_PMU_EVENT_CODE(hw_event);
+	int if_type;
+
+	if (hw_event & ~CCI5xx_PMU_EVENT_MASK)
+		return -ENOENT;
+
+	switch (ev_source) {
+	case CCI5xx_PORT_S0:
+	case CCI5xx_PORT_S1:
+	case CCI5xx_PORT_S2:
+	case CCI5xx_PORT_S3:
+	case CCI5xx_PORT_S4:
+	case CCI5xx_PORT_S5:
+	case CCI5xx_PORT_S6:
+		if_type = CCI_IF_SLAVE;
+		break;
+	case CCI5xx_PORT_M0:
+	case CCI5xx_PORT_M1:
+	case CCI5xx_PORT_M2:
+	case CCI5xx_PORT_M3:
+	case CCI5xx_PORT_M4:
+	case CCI5xx_PORT_M5:
+	case CCI5xx_PORT_M6:
+		if_type = CCI_IF_MASTER;
+		break;
+	case CCI5xx_PORT_GLOBAL:
+		if_type = CCI_IF_GLOBAL;
+		break;
+	default:
+		return -ENOENT;
+	}
+
+	if (ev_code >= cci_pmu->model->event_ranges[if_type].min &&
+		ev_code <= cci_pmu->model->event_ranges[if_type].max)
+		return hw_event;
+
+	return -ENOENT;
+}
+
+#endif	/* CONFIG_ARM_CCI5xx_PMU */
+
+/*
+ * Program the CCI PMU counters which have PERF_HES_ARCH set
+ * with the event period and mark them ready before we enable
+ * PMU.
+ */
+static void cci_pmu_sync_counters(struct cci_pmu *cci_pmu)
+{
+	int i;
+	struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events;
+	DECLARE_BITMAP(mask, HW_CNTRS_MAX);
+
+	bitmap_zero(mask, HW_CNTRS_MAX);
+	for_each_set_bit(i, cci_pmu->hw_events.used_mask, cci_pmu->num_cntrs) {
+		struct perf_event *event = cci_hw->events[i];
+
+		if (WARN_ON(!event))
+			continue;
+
+		/* Leave the events which are not counting */
+		if (event->hw.state & PERF_HES_STOPPED)
+			continue;
+		if (event->hw.state & PERF_HES_ARCH) {
+			__set_bit(i, mask);
+			event->hw.state &= ~PERF_HES_ARCH;
+		}
+	}
+
+	pmu_write_counters(cci_pmu, mask);
+}
+
+/* Should be called with cci_pmu->hw_events->pmu_lock held */
+static void __cci_pmu_enable_nosync(struct cci_pmu *cci_pmu)
+{
+	u32 val;
+
+	/* Enable all the PMU counters. */
+	val = readl_relaxed(cci_pmu->ctrl_base + CCI_PMCR) | CCI_PMCR_CEN;
+	writel(val, cci_pmu->ctrl_base + CCI_PMCR);
+}
+
+/* Should be called with cci_pmu->hw_events->pmu_lock held */
+static void __cci_pmu_enable_sync(struct cci_pmu *cci_pmu)
+{
+	cci_pmu_sync_counters(cci_pmu);
+	__cci_pmu_enable_nosync(cci_pmu);
+}
+
+/* Should be called with cci_pmu->hw_events->pmu_lock held */
+static void __cci_pmu_disable(struct cci_pmu *cci_pmu)
+{
+	u32 val;
+
+	/* Disable all the PMU counters. */
+	val = readl_relaxed(cci_pmu->ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN;
+	writel(val, cci_pmu->ctrl_base + CCI_PMCR);
+}
+
+static ssize_t cci_pmu_format_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *eattr = container_of(attr,
+				struct dev_ext_attribute, attr);
+	return sysfs_emit(buf, "%s\n", (char *)eattr->var);
+}
+
+static ssize_t cci_pmu_event_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *eattr = container_of(attr,
+				struct dev_ext_attribute, attr);
+	/* source parameter is mandatory for normal PMU events */
+	return sysfs_emit(buf, "source=?,event=0x%lx\n",
+			  (unsigned long)eattr->var);
+}
+
+static int pmu_is_valid_counter(struct cci_pmu *cci_pmu, int idx)
+{
+	return 0 <= idx && idx <= CCI_PMU_CNTR_LAST(cci_pmu);
+}
+
+static u32 pmu_read_register(struct cci_pmu *cci_pmu, int idx, unsigned int offset)
+{
+	return readl_relaxed(cci_pmu->base +
+			     CCI_PMU_CNTR_BASE(cci_pmu->model, idx) + offset);
+}
+
+static void pmu_write_register(struct cci_pmu *cci_pmu, u32 value,
+			       int idx, unsigned int offset)
+{
+	writel_relaxed(value, cci_pmu->base +
+		       CCI_PMU_CNTR_BASE(cci_pmu->model, idx) + offset);
+}
+
+static void pmu_disable_counter(struct cci_pmu *cci_pmu, int idx)
+{
+	pmu_write_register(cci_pmu, 0, idx, CCI_PMU_CNTR_CTRL);
+}
+
+static void pmu_enable_counter(struct cci_pmu *cci_pmu, int idx)
+{
+	pmu_write_register(cci_pmu, 1, idx, CCI_PMU_CNTR_CTRL);
+}
+
+static bool __maybe_unused
+pmu_counter_is_enabled(struct cci_pmu *cci_pmu, int idx)
+{
+	return (pmu_read_register(cci_pmu, idx, CCI_PMU_CNTR_CTRL) & 0x1) != 0;
+}
+
+static void pmu_set_event(struct cci_pmu *cci_pmu, int idx, unsigned long event)
+{
+	pmu_write_register(cci_pmu, event, idx, CCI_PMU_EVT_SEL);
+}
+
+/*
+ * For all counters on the CCI-PMU, disable any 'enabled' counters,
+ * saving the changed counters in the mask, so that we can restore
+ * it later using pmu_restore_counters. The mask is private to the
+ * caller. We cannot rely on the used_mask maintained by the CCI_PMU
+ * as it only tells us if the counter is assigned to perf_event or not.
+ * The state of the perf_event cannot be locked by the PMU layer, hence
+ * we check the individual counter status (which can be locked by
+ * cci_pm->hw_events->pmu_lock).
+ *
+ * @mask should be initialised to empty by the caller.
+ */
+static void __maybe_unused
+pmu_save_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
+{
+	int i;
+
+	for (i = 0; i < cci_pmu->num_cntrs; i++) {
+		if (pmu_counter_is_enabled(cci_pmu, i)) {
+			set_bit(i, mask);
+			pmu_disable_counter(cci_pmu, i);
+		}
+	}
+}
+
+/*
+ * Restore the status of the counters. Reversal of the pmu_save_counters().
+ * For each counter set in the mask, enable the counter back.
+ */
+static void __maybe_unused
+pmu_restore_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
+{
+	int i;
+
+	for_each_set_bit(i, mask, cci_pmu->num_cntrs)
+		pmu_enable_counter(cci_pmu, i);
+}
+
+/*
+ * Returns the number of programmable counters actually implemented
+ * by the cci
+ */
+static u32 pmu_get_max_counters(struct cci_pmu *cci_pmu)
+{
+	return (readl_relaxed(cci_pmu->ctrl_base + CCI_PMCR) &
+		CCI_PMCR_NCNT_MASK) >> CCI_PMCR_NCNT_SHIFT;
+}
+
+static int pmu_get_event_idx(struct cci_pmu_hw_events *hw, struct perf_event *event)
+{
+	struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
+	unsigned long cci_event = event->hw.config_base;
+	int idx;
+
+	if (cci_pmu->model->get_event_idx)
+		return cci_pmu->model->get_event_idx(cci_pmu, hw, cci_event);
+
+	/* Generic code to find an unused idx from the mask */
+	for (idx = 0; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++)
+		if (!test_and_set_bit(idx, hw->used_mask))
+			return idx;
+
+	/* No counters available */
+	return -EAGAIN;
+}
+
+static int pmu_map_event(struct perf_event *event)
+{
+	struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
+
+	if (event->attr.type < PERF_TYPE_MAX ||
+			!cci_pmu->model->validate_hw_event)
+		return -ENOENT;
+
+	return	cci_pmu->model->validate_hw_event(cci_pmu, event->attr.config);
+}
+
+static int pmu_request_irq(struct cci_pmu *cci_pmu, irq_handler_t handler)
+{
+	int i;
+	struct platform_device *pmu_device = cci_pmu->plat_device;
+
+	if (unlikely(!pmu_device))
+		return -ENODEV;
+
+	if (cci_pmu->nr_irqs < 1) {
+		dev_err(&pmu_device->dev, "no irqs for CCI PMUs defined\n");
+		return -ENODEV;
+	}
+
+	/*
+	 * Register all available CCI PMU interrupts. In the interrupt handler
+	 * we iterate over the counters checking for interrupt source (the
+	 * overflowing counter) and clear it.
+	 *
+	 * This should allow handling of non-unique interrupt for the counters.
+	 */
+	for (i = 0; i < cci_pmu->nr_irqs; i++) {
+		int err = request_irq(cci_pmu->irqs[i], handler, IRQF_SHARED,
+				"arm-cci-pmu", cci_pmu);
+		if (err) {
+			dev_err(&pmu_device->dev, "unable to request IRQ%d for ARM CCI PMU counters\n",
+				cci_pmu->irqs[i]);
+			return err;
+		}
+
+		set_bit(i, &cci_pmu->active_irqs);
+	}
+
+	return 0;
+}
+
+static void pmu_free_irq(struct cci_pmu *cci_pmu)
+{
+	int i;
+
+	for (i = 0; i < cci_pmu->nr_irqs; i++) {
+		if (!test_and_clear_bit(i, &cci_pmu->active_irqs))
+			continue;
+
+		free_irq(cci_pmu->irqs[i], cci_pmu);
+	}
+}
+
+static u32 pmu_read_counter(struct perf_event *event)
+{
+	struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
+	struct hw_perf_event *hw_counter = &event->hw;
+	int idx = hw_counter->idx;
+	u32 value;
+
+	if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) {
+		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
+		return 0;
+	}
+	value = pmu_read_register(cci_pmu, idx, CCI_PMU_CNTR);
+
+	return value;
+}
+
+static void pmu_write_counter(struct cci_pmu *cci_pmu, u32 value, int idx)
+{
+	pmu_write_register(cci_pmu, value, idx, CCI_PMU_CNTR);
+}
+
+static void __pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
+{
+	int i;
+	struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events;
+
+	for_each_set_bit(i, mask, cci_pmu->num_cntrs) {
+		struct perf_event *event = cci_hw->events[i];
+
+		if (WARN_ON(!event))
+			continue;
+		pmu_write_counter(cci_pmu, local64_read(&event->hw.prev_count), i);
+	}
+}
+
+static void pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
+{
+	if (cci_pmu->model->write_counters)
+		cci_pmu->model->write_counters(cci_pmu, mask);
+	else
+		__pmu_write_counters(cci_pmu, mask);
+}
+
+#ifdef CONFIG_ARM_CCI5xx_PMU
+
+/*
+ * CCI-500/CCI-550 has advanced power saving policies, which could gate the
+ * clocks to the PMU counters, which makes the writes to them ineffective.
+ * The only way to write to those counters is when the global counters
+ * are enabled and the particular counter is enabled.
+ *
+ * So we do the following :
+ *
+ * 1) Disable all the PMU counters, saving their current state
+ * 2) Enable the global PMU profiling, now that all counters are
+ *    disabled.
+ *
+ * For each counter to be programmed, repeat steps 3-7:
+ *
+ * 3) Write an invalid event code to the event control register for the
+      counter, so that the counters are not modified.
+ * 4) Enable the counter control for the counter.
+ * 5) Set the counter value
+ * 6) Disable the counter
+ * 7) Restore the event in the target counter
+ *
+ * 8) Disable the global PMU.
+ * 9) Restore the status of the rest of the counters.
+ *
+ * We choose an event which for CCI-5xx is guaranteed not to count.
+ * We use the highest possible event code (0x1f) for the master interface 0.
+ */
+#define CCI5xx_INVALID_EVENT	((CCI5xx_PORT_M0 << CCI5xx_PMU_EVENT_SOURCE_SHIFT) | \
+				 (CCI5xx_PMU_EVENT_CODE_MASK << CCI5xx_PMU_EVENT_CODE_SHIFT))
+static void cci5xx_pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
+{
+	int i;
+	DECLARE_BITMAP(saved_mask, HW_CNTRS_MAX);
+
+	bitmap_zero(saved_mask, cci_pmu->num_cntrs);
+	pmu_save_counters(cci_pmu, saved_mask);
+
+	/*
+	 * Now that all the counters are disabled, we can safely turn the PMU on,
+	 * without syncing the status of the counters
+	 */
+	__cci_pmu_enable_nosync(cci_pmu);
+
+	for_each_set_bit(i, mask, cci_pmu->num_cntrs) {
+		struct perf_event *event = cci_pmu->hw_events.events[i];
+
+		if (WARN_ON(!event))
+			continue;
+
+		pmu_set_event(cci_pmu, i, CCI5xx_INVALID_EVENT);
+		pmu_enable_counter(cci_pmu, i);
+		pmu_write_counter(cci_pmu, local64_read(&event->hw.prev_count), i);
+		pmu_disable_counter(cci_pmu, i);
+		pmu_set_event(cci_pmu, i, event->hw.config_base);
+	}
+
+	__cci_pmu_disable(cci_pmu);
+
+	pmu_restore_counters(cci_pmu, saved_mask);
+}
+
+#endif	/* CONFIG_ARM_CCI5xx_PMU */
+
+static u64 pmu_event_update(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 delta, prev_raw_count, new_raw_count;
+
+	do {
+		prev_raw_count = local64_read(&hwc->prev_count);
+		new_raw_count = pmu_read_counter(event);
+	} while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+		 new_raw_count) != prev_raw_count);
+
+	delta = (new_raw_count - prev_raw_count) & CCI_PMU_CNTR_MASK;
+
+	local64_add(delta, &event->count);
+
+	return new_raw_count;
+}
+
+static void pmu_read(struct perf_event *event)
+{
+	pmu_event_update(event);
+}
+
+static void pmu_event_set_period(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	/*
+	 * The CCI PMU counters have a period of 2^32. To account for the
+	 * possiblity of extreme interrupt latency we program for a period of
+	 * half that. Hopefully we can handle the interrupt before another 2^31
+	 * events occur and the counter overtakes its previous value.
+	 */
+	u64 val = 1ULL << 31;
+	local64_set(&hwc->prev_count, val);
+
+	/*
+	 * CCI PMU uses PERF_HES_ARCH to keep track of the counters, whose
+	 * values needs to be sync-ed with the s/w state before the PMU is
+	 * enabled.
+	 * Mark this counter for sync.
+	 */
+	hwc->state |= PERF_HES_ARCH;
+}
+
+static irqreturn_t pmu_handle_irq(int irq_num, void *dev)
+{
+	struct cci_pmu *cci_pmu = dev;
+	struct cci_pmu_hw_events *events = &cci_pmu->hw_events;
+	int idx, handled = IRQ_NONE;
+
+	raw_spin_lock(&events->pmu_lock);
+
+	/* Disable the PMU while we walk through the counters */
+	__cci_pmu_disable(cci_pmu);
+	/*
+	 * Iterate over counters and update the corresponding perf events.
+	 * This should work regardless of whether we have per-counter overflow
+	 * interrupt or a combined overflow interrupt.
+	 */
+	for (idx = 0; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++) {
+		struct perf_event *event = events->events[idx];
+
+		if (!event)
+			continue;
+
+		/* Did this counter overflow? */
+		if (!(pmu_read_register(cci_pmu, idx, CCI_PMU_OVRFLW) &
+		      CCI_PMU_OVRFLW_FLAG))
+			continue;
+
+		pmu_write_register(cci_pmu, CCI_PMU_OVRFLW_FLAG, idx,
+							CCI_PMU_OVRFLW);
+
+		pmu_event_update(event);
+		pmu_event_set_period(event);
+		handled = IRQ_HANDLED;
+	}
+
+	/* Enable the PMU and sync possibly overflowed counters */
+	__cci_pmu_enable_sync(cci_pmu);
+	raw_spin_unlock(&events->pmu_lock);
+
+	return IRQ_RETVAL(handled);
+}
+
+static int cci_pmu_get_hw(struct cci_pmu *cci_pmu)
+{
+	int ret = pmu_request_irq(cci_pmu, pmu_handle_irq);
+	if (ret) {
+		pmu_free_irq(cci_pmu);
+		return ret;
+	}
+	return 0;
+}
+
+static void cci_pmu_put_hw(struct cci_pmu *cci_pmu)
+{
+	pmu_free_irq(cci_pmu);
+}
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
+	atomic_t *active_events = &cci_pmu->active_events;
+	struct mutex *reserve_mutex = &cci_pmu->reserve_mutex;
+
+	if (atomic_dec_and_mutex_lock(active_events, reserve_mutex)) {
+		cci_pmu_put_hw(cci_pmu);
+		mutex_unlock(reserve_mutex);
+	}
+}
+
+static void cci_pmu_enable(struct pmu *pmu)
+{
+	struct cci_pmu *cci_pmu = to_cci_pmu(pmu);
+	struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
+	bool enabled = !bitmap_empty(hw_events->used_mask, cci_pmu->num_cntrs);
+	unsigned long flags;
+
+	if (!enabled)
+		return;
+
+	raw_spin_lock_irqsave(&hw_events->pmu_lock, flags);
+	__cci_pmu_enable_sync(cci_pmu);
+	raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags);
+
+}
+
+static void cci_pmu_disable(struct pmu *pmu)
+{
+	struct cci_pmu *cci_pmu = to_cci_pmu(pmu);
+	struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&hw_events->pmu_lock, flags);
+	__cci_pmu_disable(cci_pmu);
+	raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags);
+}
+
+/*
+ * Check if the idx represents a non-programmable counter.
+ * All the fixed event counters are mapped before the programmable
+ * counters.
+ */
+static bool pmu_fixed_hw_idx(struct cci_pmu *cci_pmu, int idx)
+{
+	return (idx >= 0) && (idx < cci_pmu->model->fixed_hw_cntrs);
+}
+
+static void cci_pmu_start(struct perf_event *event, int pmu_flags)
+{
+	struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
+	struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+	unsigned long flags;
+
+	/*
+	 * To handle interrupt latency, we always reprogram the period
+	 * regardless of PERF_EF_RELOAD.
+	 */
+	if (pmu_flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+	hwc->state = 0;
+
+	if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) {
+		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
+		return;
+	}
+
+	raw_spin_lock_irqsave(&hw_events->pmu_lock, flags);
+
+	/* Configure the counter unless you are counting a fixed event */
+	if (!pmu_fixed_hw_idx(cci_pmu, idx))
+		pmu_set_event(cci_pmu, idx, hwc->config_base);
+
+	pmu_event_set_period(event);
+	pmu_enable_counter(cci_pmu, idx);
+
+	raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags);
+}
+
+static void cci_pmu_stop(struct perf_event *event, int pmu_flags)
+{
+	struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (hwc->state & PERF_HES_STOPPED)
+		return;
+
+	if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) {
+		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
+		return;
+	}
+
+	/*
+	 * We always reprogram the counter, so ignore PERF_EF_UPDATE. See
+	 * cci_pmu_start()
+	 */
+	pmu_disable_counter(cci_pmu, idx);
+	pmu_event_update(event);
+	hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static int cci_pmu_add(struct perf_event *event, int flags)
+{
+	struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
+	struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+
+	/* If we don't have a space for the counter then finish early. */
+	idx = pmu_get_event_idx(hw_events, event);
+	if (idx < 0)
+		return idx;
+
+	event->hw.idx = idx;
+	hw_events->events[idx] = event;
+
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	if (flags & PERF_EF_START)
+		cci_pmu_start(event, PERF_EF_RELOAD);
+
+	/* Propagate our changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+	return 0;
+}
+
+static void cci_pmu_del(struct perf_event *event, int flags)
+{
+	struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
+	struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	cci_pmu_stop(event, PERF_EF_UPDATE);
+	hw_events->events[idx] = NULL;
+	clear_bit(idx, hw_events->used_mask);
+
+	perf_event_update_userpage(event);
+}
+
+static int validate_event(struct pmu *cci_pmu,
+			  struct cci_pmu_hw_events *hw_events,
+			  struct perf_event *event)
+{
+	if (is_software_event(event))
+		return 1;
+
+	/*
+	 * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The
+	 * core perf code won't check that the pmu->ctx == leader->ctx
+	 * until after pmu->event_init(event).
+	 */
+	if (event->pmu != cci_pmu)
+		return 0;
+
+	if (event->state < PERF_EVENT_STATE_OFF)
+		return 1;
+
+	if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
+		return 1;
+
+	return pmu_get_event_idx(hw_events, event) >= 0;
+}
+
+static int validate_group(struct perf_event *event)
+{
+	struct perf_event *sibling, *leader = event->group_leader;
+	struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
+	unsigned long mask[BITS_TO_LONGS(HW_CNTRS_MAX)];
+	struct cci_pmu_hw_events fake_pmu = {
+		/*
+		 * Initialise the fake PMU. We only need to populate the
+		 * used_mask for the purposes of validation.
+		 */
+		.used_mask = mask,
+	};
+	bitmap_zero(mask, cci_pmu->num_cntrs);
+
+	if (!validate_event(event->pmu, &fake_pmu, leader))
+		return -EINVAL;
+
+	for_each_sibling_event(sibling, leader) {
+		if (!validate_event(event->pmu, &fake_pmu, sibling))
+			return -EINVAL;
+	}
+
+	if (!validate_event(event->pmu, &fake_pmu, event))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int mapping;
+
+	mapping = pmu_map_event(event);
+
+	if (mapping < 0) {
+		pr_debug("event %x:%llx not supported\n", event->attr.type,
+			 event->attr.config);
+		return mapping;
+	}
+
+	/*
+	 * We don't assign an index until we actually place the event onto
+	 * hardware. Use -1 to signify that we haven't decided where to put it
+	 * yet.
+	 */
+	hwc->idx		= -1;
+	hwc->config_base	= 0;
+	hwc->config		= 0;
+	hwc->event_base		= 0;
+
+	/*
+	 * Store the event encoding into the config_base field.
+	 */
+	hwc->config_base	    |= (unsigned long)mapping;
+
+	if (event->group_leader != event) {
+		if (validate_group(event) != 0)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int cci_pmu_event_init(struct perf_event *event)
+{
+	struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
+	atomic_t *active_events = &cci_pmu->active_events;
+	int err = 0;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/* Shared by all CPUs, no meaningful state to sample */
+	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+		return -EOPNOTSUPP;
+
+	/*
+	 * Following the example set by other "uncore" PMUs, we accept any CPU
+	 * and rewrite its affinity dynamically rather than having perf core
+	 * handle cpu == -1 and pid == -1 for this case.
+	 *
+	 * The perf core will pin online CPUs for the duration of this call and
+	 * the event being installed into its context, so the PMU's CPU can't
+	 * change under our feet.
+	 */
+	if (event->cpu < 0)
+		return -EINVAL;
+	event->cpu = cci_pmu->cpu;
+
+	event->destroy = hw_perf_event_destroy;
+	if (!atomic_inc_not_zero(active_events)) {
+		mutex_lock(&cci_pmu->reserve_mutex);
+		if (atomic_read(active_events) == 0)
+			err = cci_pmu_get_hw(cci_pmu);
+		if (!err)
+			atomic_inc(active_events);
+		mutex_unlock(&cci_pmu->reserve_mutex);
+	}
+	if (err)
+		return err;
+
+	err = __hw_perf_event_init(event);
+	if (err)
+		hw_perf_event_destroy(event);
+
+	return err;
+}
+
+static ssize_t pmu_cpumask_attr_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct cci_pmu *cci_pmu = to_cci_pmu(pmu);
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(cci_pmu->cpu));
+}
+
+static struct device_attribute pmu_cpumask_attr =
+	__ATTR(cpumask, S_IRUGO, pmu_cpumask_attr_show, NULL);
+
+static struct attribute *pmu_attrs[] = {
+	&pmu_cpumask_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group pmu_attr_group = {
+	.attrs = pmu_attrs,
+};
+
+static struct attribute_group pmu_format_attr_group = {
+	.name = "format",
+	.attrs = NULL,		/* Filled in cci_pmu_init_attrs */
+};
+
+static struct attribute_group pmu_event_attr_group = {
+	.name = "events",
+	.attrs = NULL,		/* Filled in cci_pmu_init_attrs */
+};
+
+static const struct attribute_group *pmu_attr_groups[] = {
+	&pmu_attr_group,
+	&pmu_format_attr_group,
+	&pmu_event_attr_group,
+	NULL
+};
+
+static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev)
+{
+	const struct cci_pmu_model *model = cci_pmu->model;
+	char *name = model->name;
+	u32 num_cntrs;
+
+	if (WARN_ON(model->num_hw_cntrs > NUM_HW_CNTRS_MAX))
+		return -EINVAL;
+	if (WARN_ON(model->fixed_hw_cntrs > FIXED_HW_CNTRS_MAX))
+		return -EINVAL;
+
+	pmu_event_attr_group.attrs = model->event_attrs;
+	pmu_format_attr_group.attrs = model->format_attrs;
+
+	cci_pmu->pmu = (struct pmu) {
+		.module		= THIS_MODULE,
+		.name		= cci_pmu->model->name,
+		.task_ctx_nr	= perf_invalid_context,
+		.pmu_enable	= cci_pmu_enable,
+		.pmu_disable	= cci_pmu_disable,
+		.event_init	= cci_pmu_event_init,
+		.add		= cci_pmu_add,
+		.del		= cci_pmu_del,
+		.start		= cci_pmu_start,
+		.stop		= cci_pmu_stop,
+		.read		= pmu_read,
+		.attr_groups	= pmu_attr_groups,
+		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
+	};
+
+	cci_pmu->plat_device = pdev;
+	num_cntrs = pmu_get_max_counters(cci_pmu);
+	if (num_cntrs > cci_pmu->model->num_hw_cntrs) {
+		dev_warn(&pdev->dev,
+			"PMU implements more counters(%d) than supported by"
+			" the model(%d), truncated.",
+			num_cntrs, cci_pmu->model->num_hw_cntrs);
+		num_cntrs = cci_pmu->model->num_hw_cntrs;
+	}
+	cci_pmu->num_cntrs = num_cntrs + cci_pmu->model->fixed_hw_cntrs;
+
+	return perf_pmu_register(&cci_pmu->pmu, name, -1);
+}
+
+static int cci_pmu_offline_cpu(unsigned int cpu)
+{
+	int target;
+
+	if (!g_cci_pmu || cpu != g_cci_pmu->cpu)
+		return 0;
+
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	perf_pmu_migrate_context(&g_cci_pmu->pmu, cpu, target);
+	g_cci_pmu->cpu = target;
+	return 0;
+}
+
+static __maybe_unused struct cci_pmu_model cci_pmu_models[] = {
+#ifdef CONFIG_ARM_CCI400_PMU
+	[CCI400_R0] = {
+		.name = "CCI_400",
+		.fixed_hw_cntrs = FIXED_HW_CNTRS_CII_4XX, /* Cycle counter */
+		.num_hw_cntrs = NUM_HW_CNTRS_CII_4XX,
+		.cntr_size = SZ_4K,
+		.format_attrs = cci400_pmu_format_attrs,
+		.event_attrs = cci400_r0_pmu_event_attrs,
+		.event_ranges = {
+			[CCI_IF_SLAVE] = {
+				CCI400_R0_SLAVE_PORT_MIN_EV,
+				CCI400_R0_SLAVE_PORT_MAX_EV,
+			},
+			[CCI_IF_MASTER] = {
+				CCI400_R0_MASTER_PORT_MIN_EV,
+				CCI400_R0_MASTER_PORT_MAX_EV,
+			},
+		},
+		.validate_hw_event = cci400_validate_hw_event,
+		.get_event_idx = cci400_get_event_idx,
+	},
+	[CCI400_R1] = {
+		.name = "CCI_400_r1",
+		.fixed_hw_cntrs = FIXED_HW_CNTRS_CII_4XX, /* Cycle counter */
+		.num_hw_cntrs = NUM_HW_CNTRS_CII_4XX,
+		.cntr_size = SZ_4K,
+		.format_attrs = cci400_pmu_format_attrs,
+		.event_attrs = cci400_r1_pmu_event_attrs,
+		.event_ranges = {
+			[CCI_IF_SLAVE] = {
+				CCI400_R1_SLAVE_PORT_MIN_EV,
+				CCI400_R1_SLAVE_PORT_MAX_EV,
+			},
+			[CCI_IF_MASTER] = {
+				CCI400_R1_MASTER_PORT_MIN_EV,
+				CCI400_R1_MASTER_PORT_MAX_EV,
+			},
+		},
+		.validate_hw_event = cci400_validate_hw_event,
+		.get_event_idx = cci400_get_event_idx,
+	},
+#endif
+#ifdef CONFIG_ARM_CCI5xx_PMU
+	[CCI500_R0] = {
+		.name = "CCI_500",
+		.fixed_hw_cntrs = FIXED_HW_CNTRS_CII_5XX,
+		.num_hw_cntrs = NUM_HW_CNTRS_CII_5XX,
+		.cntr_size = SZ_64K,
+		.format_attrs = cci5xx_pmu_format_attrs,
+		.event_attrs = cci5xx_pmu_event_attrs,
+		.event_ranges = {
+			[CCI_IF_SLAVE] = {
+				CCI5xx_SLAVE_PORT_MIN_EV,
+				CCI5xx_SLAVE_PORT_MAX_EV,
+			},
+			[CCI_IF_MASTER] = {
+				CCI5xx_MASTER_PORT_MIN_EV,
+				CCI5xx_MASTER_PORT_MAX_EV,
+			},
+			[CCI_IF_GLOBAL] = {
+				CCI5xx_GLOBAL_PORT_MIN_EV,
+				CCI5xx_GLOBAL_PORT_MAX_EV,
+			},
+		},
+		.validate_hw_event = cci500_validate_hw_event,
+		.write_counters	= cci5xx_pmu_write_counters,
+	},
+	[CCI550_R0] = {
+		.name = "CCI_550",
+		.fixed_hw_cntrs = FIXED_HW_CNTRS_CII_5XX,
+		.num_hw_cntrs = NUM_HW_CNTRS_CII_5XX,
+		.cntr_size = SZ_64K,
+		.format_attrs = cci5xx_pmu_format_attrs,
+		.event_attrs = cci5xx_pmu_event_attrs,
+		.event_ranges = {
+			[CCI_IF_SLAVE] = {
+				CCI5xx_SLAVE_PORT_MIN_EV,
+				CCI5xx_SLAVE_PORT_MAX_EV,
+			},
+			[CCI_IF_MASTER] = {
+				CCI5xx_MASTER_PORT_MIN_EV,
+				CCI5xx_MASTER_PORT_MAX_EV,
+			},
+			[CCI_IF_GLOBAL] = {
+				CCI5xx_GLOBAL_PORT_MIN_EV,
+				CCI5xx_GLOBAL_PORT_MAX_EV,
+			},
+		},
+		.validate_hw_event = cci550_validate_hw_event,
+		.write_counters	= cci5xx_pmu_write_counters,
+	},
+#endif
+};
+
+static const struct of_device_id arm_cci_pmu_matches[] = {
+#ifdef CONFIG_ARM_CCI400_PMU
+	{
+		.compatible = "arm,cci-400-pmu",
+		.data	= NULL,
+	},
+	{
+		.compatible = "arm,cci-400-pmu,r0",
+		.data	= &cci_pmu_models[CCI400_R0],
+	},
+	{
+		.compatible = "arm,cci-400-pmu,r1",
+		.data	= &cci_pmu_models[CCI400_R1],
+	},
+#endif
+#ifdef CONFIG_ARM_CCI5xx_PMU
+	{
+		.compatible = "arm,cci-500-pmu,r0",
+		.data = &cci_pmu_models[CCI500_R0],
+	},
+	{
+		.compatible = "arm,cci-550-pmu,r0",
+		.data = &cci_pmu_models[CCI550_R0],
+	},
+#endif
+	{},
+};
+MODULE_DEVICE_TABLE(of, arm_cci_pmu_matches);
+
+static bool is_duplicate_irq(int irq, int *irqs, int nr_irqs)
+{
+	int i;
+
+	for (i = 0; i < nr_irqs; i++)
+		if (irq == irqs[i])
+			return true;
+
+	return false;
+}
+
+static struct cci_pmu *cci_pmu_alloc(struct device *dev)
+{
+	struct cci_pmu *cci_pmu;
+	const struct cci_pmu_model *model;
+
+	/*
+	 * All allocations are devm_* hence we don't have to free
+	 * them explicitly on an error, as it would end up in driver
+	 * detach.
+	 */
+	cci_pmu = devm_kzalloc(dev, sizeof(*cci_pmu), GFP_KERNEL);
+	if (!cci_pmu)
+		return ERR_PTR(-ENOMEM);
+
+	cci_pmu->ctrl_base = *(void __iomem **)dev->platform_data;
+
+	model = of_device_get_match_data(dev);
+	if (!model) {
+		dev_warn(dev,
+			 "DEPRECATED compatible property, requires secure access to CCI registers");
+		model = probe_cci_model(cci_pmu);
+	}
+	if (!model) {
+		dev_warn(dev, "CCI PMU version not supported\n");
+		return ERR_PTR(-ENODEV);
+	}
+
+	cci_pmu->model = model;
+	cci_pmu->irqs = devm_kcalloc(dev, CCI_PMU_MAX_HW_CNTRS(model),
+					sizeof(*cci_pmu->irqs), GFP_KERNEL);
+	if (!cci_pmu->irqs)
+		return ERR_PTR(-ENOMEM);
+	cci_pmu->hw_events.events = devm_kcalloc(dev,
+					     CCI_PMU_MAX_HW_CNTRS(model),
+					     sizeof(*cci_pmu->hw_events.events),
+					     GFP_KERNEL);
+	if (!cci_pmu->hw_events.events)
+		return ERR_PTR(-ENOMEM);
+	cci_pmu->hw_events.used_mask = devm_bitmap_zalloc(dev,
+							  CCI_PMU_MAX_HW_CNTRS(model),
+							  GFP_KERNEL);
+	if (!cci_pmu->hw_events.used_mask)
+		return ERR_PTR(-ENOMEM);
+
+	return cci_pmu;
+}
+
+static int cci_pmu_probe(struct platform_device *pdev)
+{
+	struct cci_pmu *cci_pmu;
+	int i, ret, irq;
+
+	cci_pmu = cci_pmu_alloc(&pdev->dev);
+	if (IS_ERR(cci_pmu))
+		return PTR_ERR(cci_pmu);
+
+	cci_pmu->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(cci_pmu->base))
+		return -ENOMEM;
+
+	/*
+	 * CCI PMU has one overflow interrupt per counter; but some may be tied
+	 * together to a common interrupt.
+	 */
+	cci_pmu->nr_irqs = 0;
+	for (i = 0; i < CCI_PMU_MAX_HW_CNTRS(cci_pmu->model); i++) {
+		irq = platform_get_irq(pdev, i);
+		if (irq < 0)
+			break;
+
+		if (is_duplicate_irq(irq, cci_pmu->irqs, cci_pmu->nr_irqs))
+			continue;
+
+		cci_pmu->irqs[cci_pmu->nr_irqs++] = irq;
+	}
+
+	/*
+	 * Ensure that the device tree has as many interrupts as the number
+	 * of counters.
+	 */
+	if (i < CCI_PMU_MAX_HW_CNTRS(cci_pmu->model)) {
+		dev_warn(&pdev->dev, "In-correct number of interrupts: %d, should be %d\n",
+			i, CCI_PMU_MAX_HW_CNTRS(cci_pmu->model));
+		return -EINVAL;
+	}
+
+	raw_spin_lock_init(&cci_pmu->hw_events.pmu_lock);
+	mutex_init(&cci_pmu->reserve_mutex);
+	atomic_set(&cci_pmu->active_events, 0);
+
+	cci_pmu->cpu = raw_smp_processor_id();
+	g_cci_pmu = cci_pmu;
+	cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_CCI_ONLINE,
+				  "perf/arm/cci:online", NULL,
+				  cci_pmu_offline_cpu);
+
+	ret = cci_pmu_init(cci_pmu, pdev);
+	if (ret)
+		goto error_pmu_init;
+
+	pr_info("ARM %s PMU driver probed", cci_pmu->model->name);
+	return 0;
+
+error_pmu_init:
+	cpuhp_remove_state(CPUHP_AP_PERF_ARM_CCI_ONLINE);
+	g_cci_pmu = NULL;
+	return ret;
+}
+
+static int cci_pmu_remove(struct platform_device *pdev)
+{
+	if (!g_cci_pmu)
+		return 0;
+
+	cpuhp_remove_state(CPUHP_AP_PERF_ARM_CCI_ONLINE);
+	perf_pmu_unregister(&g_cci_pmu->pmu);
+	g_cci_pmu = NULL;
+
+	return 0;
+}
+
+static struct platform_driver cci_pmu_driver = {
+	.driver = {
+		   .name = DRIVER_NAME,
+		   .of_match_table = arm_cci_pmu_matches,
+		   .suppress_bind_attrs = true,
+		  },
+	.probe = cci_pmu_probe,
+	.remove = cci_pmu_remove,
+};
+
+module_platform_driver(cci_pmu_driver);
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("ARM CCI PMU support");
diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
new file mode 100644
index 0000000000..728d13d8e9
--- /dev/null
+++ b/drivers/perf/arm-ccn.c
@@ -0,0 +1,1574 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright (C) 2014 ARM Limited
+ */
+
+#include <linux/ctype.h>
+#include <linux/hrtimer.h>
+#include <linux/idr.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#define CCN_NUM_XP_PORTS 2
+#define CCN_NUM_VCS 4
+#define CCN_NUM_REGIONS	256
+#define CCN_REGION_SIZE	0x10000
+
+#define CCN_ALL_OLY_ID			0xff00
+#define CCN_ALL_OLY_ID__OLY_ID__SHIFT			0
+#define CCN_ALL_OLY_ID__OLY_ID__MASK			0x1f
+#define CCN_ALL_OLY_ID__NODE_ID__SHIFT			8
+#define CCN_ALL_OLY_ID__NODE_ID__MASK			0x3f
+
+#define CCN_MN_ERRINT_STATUS		0x0008
+#define CCN_MN_ERRINT_STATUS__INTREQ__DESSERT		0x11
+#define CCN_MN_ERRINT_STATUS__ALL_ERRORS__ENABLE	0x02
+#define CCN_MN_ERRINT_STATUS__ALL_ERRORS__DISABLED	0x20
+#define CCN_MN_ERRINT_STATUS__ALL_ERRORS__DISABLE	0x22
+#define CCN_MN_ERRINT_STATUS__CORRECTED_ERRORS_ENABLE	0x04
+#define CCN_MN_ERRINT_STATUS__CORRECTED_ERRORS_DISABLED	0x40
+#define CCN_MN_ERRINT_STATUS__CORRECTED_ERRORS_DISABLE	0x44
+#define CCN_MN_ERRINT_STATUS__PMU_EVENTS__ENABLE	0x08
+#define CCN_MN_ERRINT_STATUS__PMU_EVENTS__DISABLED	0x80
+#define CCN_MN_ERRINT_STATUS__PMU_EVENTS__DISABLE	0x88
+#define CCN_MN_OLY_COMP_LIST_63_0	0x01e0
+#define CCN_MN_ERR_SIG_VAL_63_0		0x0300
+#define CCN_MN_ERR_SIG_VAL_63_0__DT			(1 << 1)
+
+#define CCN_DT_ACTIVE_DSM		0x0000
+#define CCN_DT_ACTIVE_DSM__DSM_ID__SHIFT(n)		((n) * 8)
+#define CCN_DT_ACTIVE_DSM__DSM_ID__MASK			0xff
+#define CCN_DT_CTL			0x0028
+#define CCN_DT_CTL__DT_EN				(1 << 0)
+#define CCN_DT_PMEVCNT(n)		(0x0100 + (n) * 0x8)
+#define CCN_DT_PMCCNTR			0x0140
+#define CCN_DT_PMCCNTRSR		0x0190
+#define CCN_DT_PMOVSR			0x0198
+#define CCN_DT_PMOVSR_CLR		0x01a0
+#define CCN_DT_PMOVSR_CLR__MASK				0x1f
+#define CCN_DT_PMCR			0x01a8
+#define CCN_DT_PMCR__OVFL_INTR_EN			(1 << 6)
+#define CCN_DT_PMCR__PMU_EN				(1 << 0)
+#define CCN_DT_PMSR			0x01b0
+#define CCN_DT_PMSR_REQ			0x01b8
+#define CCN_DT_PMSR_CLR			0x01c0
+
+#define CCN_HNF_PMU_EVENT_SEL		0x0600
+#define CCN_HNF_PMU_EVENT_SEL__ID__SHIFT(n)		((n) * 4)
+#define CCN_HNF_PMU_EVENT_SEL__ID__MASK			0xf
+
+#define CCN_XP_DT_CONFIG		0x0300
+#define CCN_XP_DT_CONFIG__DT_CFG__SHIFT(n)		((n) * 4)
+#define CCN_XP_DT_CONFIG__DT_CFG__MASK			0xf
+#define CCN_XP_DT_CONFIG__DT_CFG__PASS_THROUGH		0x0
+#define CCN_XP_DT_CONFIG__DT_CFG__WATCHPOINT_0_OR_1	0x1
+#define CCN_XP_DT_CONFIG__DT_CFG__WATCHPOINT(n)		(0x2 + (n))
+#define CCN_XP_DT_CONFIG__DT_CFG__XP_PMU_EVENT(n)	(0x4 + (n))
+#define CCN_XP_DT_CONFIG__DT_CFG__DEVICE_PMU_EVENT(d, n) (0x8 + (d) * 4 + (n))
+#define CCN_XP_DT_INTERFACE_SEL		0x0308
+#define CCN_XP_DT_INTERFACE_SEL__DT_IO_SEL__SHIFT(n)	(0 + (n) * 8)
+#define CCN_XP_DT_INTERFACE_SEL__DT_IO_SEL__MASK	0x1
+#define CCN_XP_DT_INTERFACE_SEL__DT_DEV_SEL__SHIFT(n)	(1 + (n) * 8)
+#define CCN_XP_DT_INTERFACE_SEL__DT_DEV_SEL__MASK	0x1
+#define CCN_XP_DT_INTERFACE_SEL__DT_VC_SEL__SHIFT(n)	(2 + (n) * 8)
+#define CCN_XP_DT_INTERFACE_SEL__DT_VC_SEL__MASK	0x3
+#define CCN_XP_DT_CMP_VAL_L(n)		(0x0310 + (n) * 0x40)
+#define CCN_XP_DT_CMP_VAL_H(n)		(0x0318 + (n) * 0x40)
+#define CCN_XP_DT_CMP_MASK_L(n)		(0x0320 + (n) * 0x40)
+#define CCN_XP_DT_CMP_MASK_H(n)		(0x0328 + (n) * 0x40)
+#define CCN_XP_DT_CONTROL		0x0370
+#define CCN_XP_DT_CONTROL__DT_ENABLE			(1 << 0)
+#define CCN_XP_DT_CONTROL__WP_ARM_SEL__SHIFT(n)		(12 + (n) * 4)
+#define CCN_XP_DT_CONTROL__WP_ARM_SEL__MASK		0xf
+#define CCN_XP_DT_CONTROL__WP_ARM_SEL__ALWAYS		0xf
+#define CCN_XP_PMU_EVENT_SEL		0x0600
+#define CCN_XP_PMU_EVENT_SEL__ID__SHIFT(n)		((n) * 7)
+#define CCN_XP_PMU_EVENT_SEL__ID__MASK			0x3f
+
+#define CCN_SBAS_PMU_EVENT_SEL		0x0600
+#define CCN_SBAS_PMU_EVENT_SEL__ID__SHIFT(n)		((n) * 4)
+#define CCN_SBAS_PMU_EVENT_SEL__ID__MASK		0xf
+
+#define CCN_RNI_PMU_EVENT_SEL		0x0600
+#define CCN_RNI_PMU_EVENT_SEL__ID__SHIFT(n)		((n) * 4)
+#define CCN_RNI_PMU_EVENT_SEL__ID__MASK			0xf
+
+#define CCN_TYPE_MN	0x01
+#define CCN_TYPE_DT	0x02
+#define CCN_TYPE_HNF	0x04
+#define CCN_TYPE_HNI	0x05
+#define CCN_TYPE_XP	0x08
+#define CCN_TYPE_SBSX	0x0c
+#define CCN_TYPE_SBAS	0x10
+#define CCN_TYPE_RNI_1P	0x14
+#define CCN_TYPE_RNI_2P	0x15
+#define CCN_TYPE_RNI_3P	0x16
+#define CCN_TYPE_RND_1P	0x18 /* RN-D = RN-I + DVM */
+#define CCN_TYPE_RND_2P	0x19
+#define CCN_TYPE_RND_3P	0x1a
+#define CCN_TYPE_CYCLES	0xff /* Pseudotype */
+
+#define CCN_EVENT_WATCHPOINT 0xfe /* Pseudoevent */
+
+#define CCN_NUM_PMU_EVENTS		4
+#define CCN_NUM_XP_WATCHPOINTS		2 /* See DT.dbg_id.num_watchpoints */
+#define CCN_NUM_PMU_EVENT_COUNTERS	8 /* See DT.dbg_id.num_pmucntr */
+#define CCN_IDX_PMU_CYCLE_COUNTER	CCN_NUM_PMU_EVENT_COUNTERS
+
+#define CCN_NUM_PREDEFINED_MASKS	4
+#define CCN_IDX_MASK_ANY		(CCN_NUM_PMU_EVENT_COUNTERS + 0)
+#define CCN_IDX_MASK_EXACT		(CCN_NUM_PMU_EVENT_COUNTERS + 1)
+#define CCN_IDX_MASK_ORDER		(CCN_NUM_PMU_EVENT_COUNTERS + 2)
+#define CCN_IDX_MASK_OPCODE		(CCN_NUM_PMU_EVENT_COUNTERS + 3)
+
+struct arm_ccn_component {
+	void __iomem *base;
+	u32 type;
+
+	DECLARE_BITMAP(pmu_events_mask, CCN_NUM_PMU_EVENTS);
+	union {
+		struct {
+			DECLARE_BITMAP(dt_cmp_mask, CCN_NUM_XP_WATCHPOINTS);
+		} xp;
+	};
+};
+
+#define pmu_to_arm_ccn(_pmu) container_of(container_of(_pmu, \
+	struct arm_ccn_dt, pmu), struct arm_ccn, dt)
+
+struct arm_ccn_dt {
+	int id;
+	void __iomem *base;
+
+	spinlock_t config_lock;
+
+	DECLARE_BITMAP(pmu_counters_mask, CCN_NUM_PMU_EVENT_COUNTERS + 1);
+	struct {
+		struct arm_ccn_component *source;
+		struct perf_event *event;
+	} pmu_counters[CCN_NUM_PMU_EVENT_COUNTERS + 1];
+
+	struct {
+	       u64 l, h;
+	} cmp_mask[CCN_NUM_PMU_EVENT_COUNTERS + CCN_NUM_PREDEFINED_MASKS];
+
+	struct hrtimer hrtimer;
+
+	unsigned int cpu;
+	struct hlist_node node;
+
+	struct pmu pmu;
+};
+
+struct arm_ccn {
+	struct device *dev;
+	void __iomem *base;
+	unsigned int irq;
+
+	unsigned sbas_present:1;
+	unsigned sbsx_present:1;
+
+	int num_nodes;
+	struct arm_ccn_component *node;
+
+	int num_xps;
+	struct arm_ccn_component *xp;
+
+	struct arm_ccn_dt dt;
+	int mn_id;
+};
+
+static int arm_ccn_node_to_xp(int node)
+{
+	return node / CCN_NUM_XP_PORTS;
+}
+
+static int arm_ccn_node_to_xp_port(int node)
+{
+	return node % CCN_NUM_XP_PORTS;
+}
+
+
+/*
+ * Bit shifts and masks in these defines must be kept in sync with
+ * arm_ccn_pmu_config_set() and CCN_FORMAT_ATTRs below!
+ */
+#define CCN_CONFIG_NODE(_config)	(((_config) >> 0) & 0xff)
+#define CCN_CONFIG_XP(_config)		(((_config) >> 0) & 0xff)
+#define CCN_CONFIG_TYPE(_config)	(((_config) >> 8) & 0xff)
+#define CCN_CONFIG_EVENT(_config)	(((_config) >> 16) & 0xff)
+#define CCN_CONFIG_PORT(_config)	(((_config) >> 24) & 0x3)
+#define CCN_CONFIG_BUS(_config)		(((_config) >> 24) & 0x3)
+#define CCN_CONFIG_VC(_config)		(((_config) >> 26) & 0x7)
+#define CCN_CONFIG_DIR(_config)		(((_config) >> 29) & 0x1)
+#define CCN_CONFIG_MASK(_config)	(((_config) >> 30) & 0xf)
+
+static void arm_ccn_pmu_config_set(u64 *config, u32 node_xp, u32 type, u32 port)
+{
+	*config &= ~((0xff << 0) | (0xff << 8) | (0x3 << 24));
+	*config |= (node_xp << 0) | (type << 8) | (port << 24);
+}
+
+static ssize_t arm_ccn_pmu_format_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *ea = container_of(attr,
+			struct dev_ext_attribute, attr);
+
+	return sysfs_emit(buf, "%s\n", (char *)ea->var);
+}
+
+#define CCN_FORMAT_ATTR(_name, _config) \
+	struct dev_ext_attribute arm_ccn_pmu_format_attr_##_name = \
+			{ __ATTR(_name, S_IRUGO, arm_ccn_pmu_format_show, \
+			NULL), _config }
+
+static CCN_FORMAT_ATTR(node, "config:0-7");
+static CCN_FORMAT_ATTR(xp, "config:0-7");
+static CCN_FORMAT_ATTR(type, "config:8-15");
+static CCN_FORMAT_ATTR(event, "config:16-23");
+static CCN_FORMAT_ATTR(port, "config:24-25");
+static CCN_FORMAT_ATTR(bus, "config:24-25");
+static CCN_FORMAT_ATTR(vc, "config:26-28");
+static CCN_FORMAT_ATTR(dir, "config:29-29");
+static CCN_FORMAT_ATTR(mask, "config:30-33");
+static CCN_FORMAT_ATTR(cmp_l, "config1:0-62");
+static CCN_FORMAT_ATTR(cmp_h, "config2:0-59");
+
+static struct attribute *arm_ccn_pmu_format_attrs[] = {
+	&arm_ccn_pmu_format_attr_node.attr.attr,
+	&arm_ccn_pmu_format_attr_xp.attr.attr,
+	&arm_ccn_pmu_format_attr_type.attr.attr,
+	&arm_ccn_pmu_format_attr_event.attr.attr,
+	&arm_ccn_pmu_format_attr_port.attr.attr,
+	&arm_ccn_pmu_format_attr_bus.attr.attr,
+	&arm_ccn_pmu_format_attr_vc.attr.attr,
+	&arm_ccn_pmu_format_attr_dir.attr.attr,
+	&arm_ccn_pmu_format_attr_mask.attr.attr,
+	&arm_ccn_pmu_format_attr_cmp_l.attr.attr,
+	&arm_ccn_pmu_format_attr_cmp_h.attr.attr,
+	NULL
+};
+
+static const struct attribute_group arm_ccn_pmu_format_attr_group = {
+	.name = "format",
+	.attrs = arm_ccn_pmu_format_attrs,
+};
+
+
+struct arm_ccn_pmu_event {
+	struct device_attribute attr;
+	u32 type;
+	u32 event;
+	int num_ports;
+	int num_vcs;
+	const char *def;
+	int mask;
+};
+
+#define CCN_EVENT_ATTR(_name) \
+	__ATTR(_name, S_IRUGO, arm_ccn_pmu_event_show, NULL)
+
+/*
+ * Events defined in TRM for MN, HN-I and SBSX are actually watchpoints set on
+ * their ports in XP they are connected to. For the sake of usability they are
+ * explicitly defined here (and translated into a relevant watchpoint in
+ * arm_ccn_pmu_event_init()) so the user can easily request them without deep
+ * knowledge of the flit format.
+ */
+
+#define CCN_EVENT_MN(_name, _def, _mask) { .attr = CCN_EVENT_ATTR(mn_##_name), \
+		.type = CCN_TYPE_MN, .event = CCN_EVENT_WATCHPOINT, \
+		.num_ports = CCN_NUM_XP_PORTS, .num_vcs = CCN_NUM_VCS, \
+		.def = _def, .mask = _mask, }
+
+#define CCN_EVENT_HNI(_name, _def, _mask) { \
+		.attr = CCN_EVENT_ATTR(hni_##_name), .type = CCN_TYPE_HNI, \
+		.event = CCN_EVENT_WATCHPOINT, .num_ports = CCN_NUM_XP_PORTS, \
+		.num_vcs = CCN_NUM_VCS, .def = _def, .mask = _mask, }
+
+#define CCN_EVENT_SBSX(_name, _def, _mask) { \
+		.attr = CCN_EVENT_ATTR(sbsx_##_name), .type = CCN_TYPE_SBSX, \
+		.event = CCN_EVENT_WATCHPOINT, .num_ports = CCN_NUM_XP_PORTS, \
+		.num_vcs = CCN_NUM_VCS, .def = _def, .mask = _mask, }
+
+#define CCN_EVENT_HNF(_name, _event) { .attr = CCN_EVENT_ATTR(hnf_##_name), \
+		.type = CCN_TYPE_HNF, .event = _event, }
+
+#define CCN_EVENT_XP(_name, _event) { .attr = CCN_EVENT_ATTR(xp_##_name), \
+		.type = CCN_TYPE_XP, .event = _event, \
+		.num_ports = CCN_NUM_XP_PORTS, .num_vcs = CCN_NUM_VCS, }
+
+/*
+ * RN-I & RN-D (RN-D = RN-I + DVM) nodes have different type ID depending
+ * on configuration. One of them is picked to represent the whole group,
+ * as they all share the same event types.
+ */
+#define CCN_EVENT_RNI(_name, _event) { .attr = CCN_EVENT_ATTR(rni_##_name), \
+		.type = CCN_TYPE_RNI_3P, .event = _event, }
+
+#define CCN_EVENT_SBAS(_name, _event) { .attr = CCN_EVENT_ATTR(sbas_##_name), \
+		.type = CCN_TYPE_SBAS, .event = _event, }
+
+#define CCN_EVENT_CYCLES(_name) { .attr = CCN_EVENT_ATTR(_name), \
+		.type = CCN_TYPE_CYCLES }
+
+
+static ssize_t arm_ccn_pmu_event_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct arm_ccn *ccn = pmu_to_arm_ccn(dev_get_drvdata(dev));
+	struct arm_ccn_pmu_event *event = container_of(attr,
+			struct arm_ccn_pmu_event, attr);
+	int res;
+
+	res = sysfs_emit(buf, "type=0x%x", event->type);
+	if (event->event)
+		res += sysfs_emit_at(buf, res, ",event=0x%x", event->event);
+	if (event->def)
+		res += sysfs_emit_at(buf, res, ",%s", event->def);
+	if (event->mask)
+		res += sysfs_emit_at(buf, res, ",mask=0x%x", event->mask);
+
+	/* Arguments required by an event */
+	switch (event->type) {
+	case CCN_TYPE_CYCLES:
+		break;
+	case CCN_TYPE_XP:
+		res += sysfs_emit_at(buf, res, ",xp=?,vc=?");
+		if (event->event == CCN_EVENT_WATCHPOINT)
+			res += sysfs_emit_at(buf, res,
+					",port=?,dir=?,cmp_l=?,cmp_h=?,mask=?");
+		else
+			res += sysfs_emit_at(buf, res, ",bus=?");
+
+		break;
+	case CCN_TYPE_MN:
+		res += sysfs_emit_at(buf, res, ",node=%d", ccn->mn_id);
+		break;
+	default:
+		res += sysfs_emit_at(buf, res, ",node=?");
+		break;
+	}
+
+	res += sysfs_emit_at(buf, res, "\n");
+
+	return res;
+}
+
+static umode_t arm_ccn_pmu_events_is_visible(struct kobject *kobj,
+				     struct attribute *attr, int index)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct arm_ccn *ccn = pmu_to_arm_ccn(dev_get_drvdata(dev));
+	struct device_attribute *dev_attr = container_of(attr,
+			struct device_attribute, attr);
+	struct arm_ccn_pmu_event *event = container_of(dev_attr,
+			struct arm_ccn_pmu_event, attr);
+
+	if (event->type == CCN_TYPE_SBAS && !ccn->sbas_present)
+		return 0;
+	if (event->type == CCN_TYPE_SBSX && !ccn->sbsx_present)
+		return 0;
+
+	return attr->mode;
+}
+
+static struct arm_ccn_pmu_event arm_ccn_pmu_events[] = {
+	CCN_EVENT_MN(eobarrier, "dir=1,vc=0,cmp_h=0x1c00", CCN_IDX_MASK_OPCODE),
+	CCN_EVENT_MN(ecbarrier, "dir=1,vc=0,cmp_h=0x1e00", CCN_IDX_MASK_OPCODE),
+	CCN_EVENT_MN(dvmop, "dir=1,vc=0,cmp_h=0x2800", CCN_IDX_MASK_OPCODE),
+	CCN_EVENT_HNI(txdatflits, "dir=1,vc=3", CCN_IDX_MASK_ANY),
+	CCN_EVENT_HNI(rxdatflits, "dir=0,vc=3", CCN_IDX_MASK_ANY),
+	CCN_EVENT_HNI(txreqflits, "dir=1,vc=0", CCN_IDX_MASK_ANY),
+	CCN_EVENT_HNI(rxreqflits, "dir=0,vc=0", CCN_IDX_MASK_ANY),
+	CCN_EVENT_HNI(rxreqflits_order, "dir=0,vc=0,cmp_h=0x8000",
+			CCN_IDX_MASK_ORDER),
+	CCN_EVENT_SBSX(txdatflits, "dir=1,vc=3", CCN_IDX_MASK_ANY),
+	CCN_EVENT_SBSX(rxdatflits, "dir=0,vc=3", CCN_IDX_MASK_ANY),
+	CCN_EVENT_SBSX(txreqflits, "dir=1,vc=0", CCN_IDX_MASK_ANY),
+	CCN_EVENT_SBSX(rxreqflits, "dir=0,vc=0", CCN_IDX_MASK_ANY),
+	CCN_EVENT_SBSX(rxreqflits_order, "dir=0,vc=0,cmp_h=0x8000",
+			CCN_IDX_MASK_ORDER),
+	CCN_EVENT_HNF(cache_miss, 0x1),
+	CCN_EVENT_HNF(l3_sf_cache_access, 0x02),
+	CCN_EVENT_HNF(cache_fill, 0x3),
+	CCN_EVENT_HNF(pocq_retry, 0x4),
+	CCN_EVENT_HNF(pocq_reqs_recvd, 0x5),
+	CCN_EVENT_HNF(sf_hit, 0x6),
+	CCN_EVENT_HNF(sf_evictions, 0x7),
+	CCN_EVENT_HNF(snoops_sent, 0x8),
+	CCN_EVENT_HNF(snoops_broadcast, 0x9),
+	CCN_EVENT_HNF(l3_eviction, 0xa),
+	CCN_EVENT_HNF(l3_fill_invalid_way, 0xb),
+	CCN_EVENT_HNF(mc_retries, 0xc),
+	CCN_EVENT_HNF(mc_reqs, 0xd),
+	CCN_EVENT_HNF(qos_hh_retry, 0xe),
+	CCN_EVENT_RNI(rdata_beats_p0, 0x1),
+	CCN_EVENT_RNI(rdata_beats_p1, 0x2),
+	CCN_EVENT_RNI(rdata_beats_p2, 0x3),
+	CCN_EVENT_RNI(rxdat_flits, 0x4),
+	CCN_EVENT_RNI(txdat_flits, 0x5),
+	CCN_EVENT_RNI(txreq_flits, 0x6),
+	CCN_EVENT_RNI(txreq_flits_retried, 0x7),
+	CCN_EVENT_RNI(rrt_full, 0x8),
+	CCN_EVENT_RNI(wrt_full, 0x9),
+	CCN_EVENT_RNI(txreq_flits_replayed, 0xa),
+	CCN_EVENT_XP(upload_starvation, 0x1),
+	CCN_EVENT_XP(download_starvation, 0x2),
+	CCN_EVENT_XP(respin, 0x3),
+	CCN_EVENT_XP(valid_flit, 0x4),
+	CCN_EVENT_XP(watchpoint, CCN_EVENT_WATCHPOINT),
+	CCN_EVENT_SBAS(rdata_beats_p0, 0x1),
+	CCN_EVENT_SBAS(rxdat_flits, 0x4),
+	CCN_EVENT_SBAS(txdat_flits, 0x5),
+	CCN_EVENT_SBAS(txreq_flits, 0x6),
+	CCN_EVENT_SBAS(txreq_flits_retried, 0x7),
+	CCN_EVENT_SBAS(rrt_full, 0x8),
+	CCN_EVENT_SBAS(wrt_full, 0x9),
+	CCN_EVENT_SBAS(txreq_flits_replayed, 0xa),
+	CCN_EVENT_CYCLES(cycles),
+};
+
+/* Populated in arm_ccn_init() */
+static struct attribute
+		*arm_ccn_pmu_events_attrs[ARRAY_SIZE(arm_ccn_pmu_events) + 1];
+
+static const struct attribute_group arm_ccn_pmu_events_attr_group = {
+	.name = "events",
+	.is_visible = arm_ccn_pmu_events_is_visible,
+	.attrs = arm_ccn_pmu_events_attrs,
+};
+
+
+static u64 *arm_ccn_pmu_get_cmp_mask(struct arm_ccn *ccn, const char *name)
+{
+	unsigned long i;
+
+	if (WARN_ON(!name || !name[0] || !isxdigit(name[0]) || !name[1]))
+		return NULL;
+	i = isdigit(name[0]) ? name[0] - '0' : 0xa + tolower(name[0]) - 'a';
+
+	switch (name[1]) {
+	case 'l':
+		return &ccn->dt.cmp_mask[i].l;
+	case 'h':
+		return &ccn->dt.cmp_mask[i].h;
+	default:
+		return NULL;
+	}
+}
+
+static ssize_t arm_ccn_pmu_cmp_mask_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct arm_ccn *ccn = pmu_to_arm_ccn(dev_get_drvdata(dev));
+	u64 *mask = arm_ccn_pmu_get_cmp_mask(ccn, attr->attr.name);
+
+	return mask ? sysfs_emit(buf, "0x%016llx\n", *mask) : -EINVAL;
+}
+
+static ssize_t arm_ccn_pmu_cmp_mask_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct arm_ccn *ccn = pmu_to_arm_ccn(dev_get_drvdata(dev));
+	u64 *mask = arm_ccn_pmu_get_cmp_mask(ccn, attr->attr.name);
+	int err = -EINVAL;
+
+	if (mask)
+		err = kstrtoull(buf, 0, mask);
+
+	return err ? err : count;
+}
+
+#define CCN_CMP_MASK_ATTR(_name) \
+	struct device_attribute arm_ccn_pmu_cmp_mask_attr_##_name = \
+			__ATTR(_name, S_IRUGO | S_IWUSR, \
+			arm_ccn_pmu_cmp_mask_show, arm_ccn_pmu_cmp_mask_store)
+
+#define CCN_CMP_MASK_ATTR_RO(_name) \
+	struct device_attribute arm_ccn_pmu_cmp_mask_attr_##_name = \
+			__ATTR(_name, S_IRUGO, arm_ccn_pmu_cmp_mask_show, NULL)
+
+static CCN_CMP_MASK_ATTR(0l);
+static CCN_CMP_MASK_ATTR(0h);
+static CCN_CMP_MASK_ATTR(1l);
+static CCN_CMP_MASK_ATTR(1h);
+static CCN_CMP_MASK_ATTR(2l);
+static CCN_CMP_MASK_ATTR(2h);
+static CCN_CMP_MASK_ATTR(3l);
+static CCN_CMP_MASK_ATTR(3h);
+static CCN_CMP_MASK_ATTR(4l);
+static CCN_CMP_MASK_ATTR(4h);
+static CCN_CMP_MASK_ATTR(5l);
+static CCN_CMP_MASK_ATTR(5h);
+static CCN_CMP_MASK_ATTR(6l);
+static CCN_CMP_MASK_ATTR(6h);
+static CCN_CMP_MASK_ATTR(7l);
+static CCN_CMP_MASK_ATTR(7h);
+static CCN_CMP_MASK_ATTR_RO(8l);
+static CCN_CMP_MASK_ATTR_RO(8h);
+static CCN_CMP_MASK_ATTR_RO(9l);
+static CCN_CMP_MASK_ATTR_RO(9h);
+static CCN_CMP_MASK_ATTR_RO(al);
+static CCN_CMP_MASK_ATTR_RO(ah);
+static CCN_CMP_MASK_ATTR_RO(bl);
+static CCN_CMP_MASK_ATTR_RO(bh);
+
+static struct attribute *arm_ccn_pmu_cmp_mask_attrs[] = {
+	&arm_ccn_pmu_cmp_mask_attr_0l.attr, &arm_ccn_pmu_cmp_mask_attr_0h.attr,
+	&arm_ccn_pmu_cmp_mask_attr_1l.attr, &arm_ccn_pmu_cmp_mask_attr_1h.attr,
+	&arm_ccn_pmu_cmp_mask_attr_2l.attr, &arm_ccn_pmu_cmp_mask_attr_2h.attr,
+	&arm_ccn_pmu_cmp_mask_attr_3l.attr, &arm_ccn_pmu_cmp_mask_attr_3h.attr,
+	&arm_ccn_pmu_cmp_mask_attr_4l.attr, &arm_ccn_pmu_cmp_mask_attr_4h.attr,
+	&arm_ccn_pmu_cmp_mask_attr_5l.attr, &arm_ccn_pmu_cmp_mask_attr_5h.attr,
+	&arm_ccn_pmu_cmp_mask_attr_6l.attr, &arm_ccn_pmu_cmp_mask_attr_6h.attr,
+	&arm_ccn_pmu_cmp_mask_attr_7l.attr, &arm_ccn_pmu_cmp_mask_attr_7h.attr,
+	&arm_ccn_pmu_cmp_mask_attr_8l.attr, &arm_ccn_pmu_cmp_mask_attr_8h.attr,
+	&arm_ccn_pmu_cmp_mask_attr_9l.attr, &arm_ccn_pmu_cmp_mask_attr_9h.attr,
+	&arm_ccn_pmu_cmp_mask_attr_al.attr, &arm_ccn_pmu_cmp_mask_attr_ah.attr,
+	&arm_ccn_pmu_cmp_mask_attr_bl.attr, &arm_ccn_pmu_cmp_mask_attr_bh.attr,
+	NULL
+};
+
+static const struct attribute_group arm_ccn_pmu_cmp_mask_attr_group = {
+	.name = "cmp_mask",
+	.attrs = arm_ccn_pmu_cmp_mask_attrs,
+};
+
+static ssize_t arm_ccn_pmu_cpumask_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct arm_ccn *ccn = pmu_to_arm_ccn(dev_get_drvdata(dev));
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(ccn->dt.cpu));
+}
+
+static struct device_attribute arm_ccn_pmu_cpumask_attr =
+		__ATTR(cpumask, S_IRUGO, arm_ccn_pmu_cpumask_show, NULL);
+
+static struct attribute *arm_ccn_pmu_cpumask_attrs[] = {
+	&arm_ccn_pmu_cpumask_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group arm_ccn_pmu_cpumask_attr_group = {
+	.attrs = arm_ccn_pmu_cpumask_attrs,
+};
+
+/*
+ * Default poll period is 10ms, which is way over the top anyway,
+ * as in the worst case scenario (an event every cycle), with 1GHz
+ * clocked bus, the smallest, 32 bit counter will overflow in
+ * more than 4s.
+ */
+static unsigned int arm_ccn_pmu_poll_period_us = 10000;
+module_param_named(pmu_poll_period_us, arm_ccn_pmu_poll_period_us, uint,
+		S_IRUGO | S_IWUSR);
+
+static ktime_t arm_ccn_pmu_timer_period(void)
+{
+	return ns_to_ktime((u64)arm_ccn_pmu_poll_period_us * 1000);
+}
+
+
+static const struct attribute_group *arm_ccn_pmu_attr_groups[] = {
+	&arm_ccn_pmu_events_attr_group,
+	&arm_ccn_pmu_format_attr_group,
+	&arm_ccn_pmu_cmp_mask_attr_group,
+	&arm_ccn_pmu_cpumask_attr_group,
+	NULL
+};
+
+
+static int arm_ccn_pmu_alloc_bit(unsigned long *bitmap, unsigned long size)
+{
+	int bit;
+
+	do {
+		bit = find_first_zero_bit(bitmap, size);
+		if (bit >= size)
+			return -EAGAIN;
+	} while (test_and_set_bit(bit, bitmap));
+
+	return bit;
+}
+
+/* All RN-I and RN-D nodes have identical PMUs */
+static int arm_ccn_pmu_type_eq(u32 a, u32 b)
+{
+	if (a == b)
+		return 1;
+
+	switch (a) {
+	case CCN_TYPE_RNI_1P:
+	case CCN_TYPE_RNI_2P:
+	case CCN_TYPE_RNI_3P:
+	case CCN_TYPE_RND_1P:
+	case CCN_TYPE_RND_2P:
+	case CCN_TYPE_RND_3P:
+		switch (b) {
+		case CCN_TYPE_RNI_1P:
+		case CCN_TYPE_RNI_2P:
+		case CCN_TYPE_RNI_3P:
+		case CCN_TYPE_RND_1P:
+		case CCN_TYPE_RND_2P:
+		case CCN_TYPE_RND_3P:
+			return 1;
+		}
+		break;
+	}
+
+	return 0;
+}
+
+static int arm_ccn_pmu_event_alloc(struct perf_event *event)
+{
+	struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu);
+	struct hw_perf_event *hw = &event->hw;
+	u32 node_xp, type, event_id;
+	struct arm_ccn_component *source;
+	int bit;
+
+	node_xp = CCN_CONFIG_NODE(event->attr.config);
+	type = CCN_CONFIG_TYPE(event->attr.config);
+	event_id = CCN_CONFIG_EVENT(event->attr.config);
+
+	/* Allocate the cycle counter */
+	if (type == CCN_TYPE_CYCLES) {
+		if (test_and_set_bit(CCN_IDX_PMU_CYCLE_COUNTER,
+				ccn->dt.pmu_counters_mask))
+			return -EAGAIN;
+
+		hw->idx = CCN_IDX_PMU_CYCLE_COUNTER;
+		ccn->dt.pmu_counters[CCN_IDX_PMU_CYCLE_COUNTER].event = event;
+
+		return 0;
+	}
+
+	/* Allocate an event counter */
+	hw->idx = arm_ccn_pmu_alloc_bit(ccn->dt.pmu_counters_mask,
+			CCN_NUM_PMU_EVENT_COUNTERS);
+	if (hw->idx < 0) {
+		dev_dbg(ccn->dev, "No more counters available!\n");
+		return -EAGAIN;
+	}
+
+	if (type == CCN_TYPE_XP)
+		source = &ccn->xp[node_xp];
+	else
+		source = &ccn->node[node_xp];
+	ccn->dt.pmu_counters[hw->idx].source = source;
+
+	/* Allocate an event source or a watchpoint */
+	if (type == CCN_TYPE_XP && event_id == CCN_EVENT_WATCHPOINT)
+		bit = arm_ccn_pmu_alloc_bit(source->xp.dt_cmp_mask,
+				CCN_NUM_XP_WATCHPOINTS);
+	else
+		bit = arm_ccn_pmu_alloc_bit(source->pmu_events_mask,
+				CCN_NUM_PMU_EVENTS);
+	if (bit < 0) {
+		dev_dbg(ccn->dev, "No more event sources/watchpoints on node/XP %d!\n",
+				node_xp);
+		clear_bit(hw->idx, ccn->dt.pmu_counters_mask);
+		return -EAGAIN;
+	}
+	hw->config_base = bit;
+
+	ccn->dt.pmu_counters[hw->idx].event = event;
+
+	return 0;
+}
+
+static void arm_ccn_pmu_event_release(struct perf_event *event)
+{
+	struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu);
+	struct hw_perf_event *hw = &event->hw;
+
+	if (hw->idx == CCN_IDX_PMU_CYCLE_COUNTER) {
+		clear_bit(CCN_IDX_PMU_CYCLE_COUNTER, ccn->dt.pmu_counters_mask);
+	} else {
+		struct arm_ccn_component *source =
+				ccn->dt.pmu_counters[hw->idx].source;
+
+		if (CCN_CONFIG_TYPE(event->attr.config) == CCN_TYPE_XP &&
+				CCN_CONFIG_EVENT(event->attr.config) ==
+				CCN_EVENT_WATCHPOINT)
+			clear_bit(hw->config_base, source->xp.dt_cmp_mask);
+		else
+			clear_bit(hw->config_base, source->pmu_events_mask);
+		clear_bit(hw->idx, ccn->dt.pmu_counters_mask);
+	}
+
+	ccn->dt.pmu_counters[hw->idx].source = NULL;
+	ccn->dt.pmu_counters[hw->idx].event = NULL;
+}
+
+static int arm_ccn_pmu_event_init(struct perf_event *event)
+{
+	struct arm_ccn *ccn;
+	struct hw_perf_event *hw = &event->hw;
+	u32 node_xp, type, event_id;
+	int valid;
+	int i;
+	struct perf_event *sibling;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	ccn = pmu_to_arm_ccn(event->pmu);
+
+	if (hw->sample_period) {
+		dev_dbg(ccn->dev, "Sampling not supported!\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (has_branch_stack(event)) {
+		dev_dbg(ccn->dev, "Can't exclude execution levels!\n");
+		return -EINVAL;
+	}
+
+	if (event->cpu < 0) {
+		dev_dbg(ccn->dev, "Can't provide per-task data!\n");
+		return -EOPNOTSUPP;
+	}
+	/*
+	 * Many perf core operations (eg. events rotation) operate on a
+	 * single CPU context. This is obvious for CPU PMUs, where one
+	 * expects the same sets of events being observed on all CPUs,
+	 * but can lead to issues for off-core PMUs, like CCN, where each
+	 * event could be theoretically assigned to a different CPU. To
+	 * mitigate this, we enforce CPU assignment to one, selected
+	 * processor (the one described in the "cpumask" attribute).
+	 */
+	event->cpu = ccn->dt.cpu;
+
+	node_xp = CCN_CONFIG_NODE(event->attr.config);
+	type = CCN_CONFIG_TYPE(event->attr.config);
+	event_id = CCN_CONFIG_EVENT(event->attr.config);
+
+	/* Validate node/xp vs topology */
+	switch (type) {
+	case CCN_TYPE_MN:
+		if (node_xp != ccn->mn_id) {
+			dev_dbg(ccn->dev, "Invalid MN ID %d!\n", node_xp);
+			return -EINVAL;
+		}
+		break;
+	case CCN_TYPE_XP:
+		if (node_xp >= ccn->num_xps) {
+			dev_dbg(ccn->dev, "Invalid XP ID %d!\n", node_xp);
+			return -EINVAL;
+		}
+		break;
+	case CCN_TYPE_CYCLES:
+		break;
+	default:
+		if (node_xp >= ccn->num_nodes) {
+			dev_dbg(ccn->dev, "Invalid node ID %d!\n", node_xp);
+			return -EINVAL;
+		}
+		if (!arm_ccn_pmu_type_eq(type, ccn->node[node_xp].type)) {
+			dev_dbg(ccn->dev, "Invalid type 0x%x for node %d!\n",
+					type, node_xp);
+			return -EINVAL;
+		}
+		break;
+	}
+
+	/* Validate event ID vs available for the type */
+	for (i = 0, valid = 0; i < ARRAY_SIZE(arm_ccn_pmu_events) && !valid;
+			i++) {
+		struct arm_ccn_pmu_event *e = &arm_ccn_pmu_events[i];
+		u32 port = CCN_CONFIG_PORT(event->attr.config);
+		u32 vc = CCN_CONFIG_VC(event->attr.config);
+
+		if (!arm_ccn_pmu_type_eq(type, e->type))
+			continue;
+		if (event_id != e->event)
+			continue;
+		if (e->num_ports && port >= e->num_ports) {
+			dev_dbg(ccn->dev, "Invalid port %d for node/XP %d!\n",
+					port, node_xp);
+			return -EINVAL;
+		}
+		if (e->num_vcs && vc >= e->num_vcs) {
+			dev_dbg(ccn->dev, "Invalid vc %d for node/XP %d!\n",
+					vc, node_xp);
+			return -EINVAL;
+		}
+		valid = 1;
+	}
+	if (!valid) {
+		dev_dbg(ccn->dev, "Invalid event 0x%x for node/XP %d!\n",
+				event_id, node_xp);
+		return -EINVAL;
+	}
+
+	/* Watchpoint-based event for a node is actually set on XP */
+	if (event_id == CCN_EVENT_WATCHPOINT && type != CCN_TYPE_XP) {
+		u32 port;
+
+		type = CCN_TYPE_XP;
+		port = arm_ccn_node_to_xp_port(node_xp);
+		node_xp = arm_ccn_node_to_xp(node_xp);
+
+		arm_ccn_pmu_config_set(&event->attr.config,
+				node_xp, type, port);
+	}
+
+	/*
+	 * We must NOT create groups containing mixed PMUs, although software
+	 * events are acceptable (for example to create a CCN group
+	 * periodically read when a hrtimer aka cpu-clock leader triggers).
+	 */
+	if (event->group_leader->pmu != event->pmu &&
+			!is_software_event(event->group_leader))
+		return -EINVAL;
+
+	for_each_sibling_event(sibling, event->group_leader) {
+		if (sibling->pmu != event->pmu &&
+				!is_software_event(sibling))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static u64 arm_ccn_pmu_read_counter(struct arm_ccn *ccn, int idx)
+{
+	u64 res;
+
+	if (idx == CCN_IDX_PMU_CYCLE_COUNTER) {
+#ifdef readq
+		res = readq(ccn->dt.base + CCN_DT_PMCCNTR);
+#else
+		/* 40 bit counter, can do snapshot and read in two parts */
+		writel(0x1, ccn->dt.base + CCN_DT_PMSR_REQ);
+		while (!(readl(ccn->dt.base + CCN_DT_PMSR) & 0x1))
+			;
+		writel(0x1, ccn->dt.base + CCN_DT_PMSR_CLR);
+		res = readl(ccn->dt.base + CCN_DT_PMCCNTRSR + 4) & 0xff;
+		res <<= 32;
+		res |= readl(ccn->dt.base + CCN_DT_PMCCNTRSR);
+#endif
+	} else {
+		res = readl(ccn->dt.base + CCN_DT_PMEVCNT(idx));
+	}
+
+	return res;
+}
+
+static void arm_ccn_pmu_event_update(struct perf_event *event)
+{
+	struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu);
+	struct hw_perf_event *hw = &event->hw;
+	u64 prev_count, new_count, mask;
+
+	do {
+		prev_count = local64_read(&hw->prev_count);
+		new_count = arm_ccn_pmu_read_counter(ccn, hw->idx);
+	} while (local64_xchg(&hw->prev_count, new_count) != prev_count);
+
+	mask = (1LLU << (hw->idx == CCN_IDX_PMU_CYCLE_COUNTER ? 40 : 32)) - 1;
+
+	local64_add((new_count - prev_count) & mask, &event->count);
+}
+
+static void arm_ccn_pmu_xp_dt_config(struct perf_event *event, int enable)
+{
+	struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu);
+	struct hw_perf_event *hw = &event->hw;
+	struct arm_ccn_component *xp;
+	u32 val, dt_cfg;
+
+	/* Nothing to do for cycle counter */
+	if (hw->idx == CCN_IDX_PMU_CYCLE_COUNTER)
+		return;
+
+	if (CCN_CONFIG_TYPE(event->attr.config) == CCN_TYPE_XP)
+		xp = &ccn->xp[CCN_CONFIG_XP(event->attr.config)];
+	else
+		xp = &ccn->xp[arm_ccn_node_to_xp(
+				CCN_CONFIG_NODE(event->attr.config))];
+
+	if (enable)
+		dt_cfg = hw->event_base;
+	else
+		dt_cfg = CCN_XP_DT_CONFIG__DT_CFG__PASS_THROUGH;
+
+	spin_lock(&ccn->dt.config_lock);
+
+	val = readl(xp->base + CCN_XP_DT_CONFIG);
+	val &= ~(CCN_XP_DT_CONFIG__DT_CFG__MASK <<
+			CCN_XP_DT_CONFIG__DT_CFG__SHIFT(hw->idx));
+	val |= dt_cfg << CCN_XP_DT_CONFIG__DT_CFG__SHIFT(hw->idx);
+	writel(val, xp->base + CCN_XP_DT_CONFIG);
+
+	spin_unlock(&ccn->dt.config_lock);
+}
+
+static void arm_ccn_pmu_event_start(struct perf_event *event, int flags)
+{
+	struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu);
+	struct hw_perf_event *hw = &event->hw;
+
+	local64_set(&event->hw.prev_count,
+			arm_ccn_pmu_read_counter(ccn, hw->idx));
+	hw->state = 0;
+
+	/* Set the DT bus input, engaging the counter */
+	arm_ccn_pmu_xp_dt_config(event, 1);
+}
+
+static void arm_ccn_pmu_event_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hw = &event->hw;
+
+	/* Disable counting, setting the DT bus to pass-through mode */
+	arm_ccn_pmu_xp_dt_config(event, 0);
+
+	if (flags & PERF_EF_UPDATE)
+		arm_ccn_pmu_event_update(event);
+
+	hw->state |= PERF_HES_STOPPED;
+}
+
+static void arm_ccn_pmu_xp_watchpoint_config(struct perf_event *event)
+{
+	struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu);
+	struct hw_perf_event *hw = &event->hw;
+	struct arm_ccn_component *source =
+			ccn->dt.pmu_counters[hw->idx].source;
+	unsigned long wp = hw->config_base;
+	u32 val;
+	u64 cmp_l = event->attr.config1;
+	u64 cmp_h = event->attr.config2;
+	u64 mask_l = ccn->dt.cmp_mask[CCN_CONFIG_MASK(event->attr.config)].l;
+	u64 mask_h = ccn->dt.cmp_mask[CCN_CONFIG_MASK(event->attr.config)].h;
+
+	hw->event_base = CCN_XP_DT_CONFIG__DT_CFG__WATCHPOINT(wp);
+
+	/* Direction (RX/TX), device (port) & virtual channel */
+	val = readl(source->base + CCN_XP_DT_INTERFACE_SEL);
+	val &= ~(CCN_XP_DT_INTERFACE_SEL__DT_IO_SEL__MASK <<
+			CCN_XP_DT_INTERFACE_SEL__DT_IO_SEL__SHIFT(wp));
+	val |= CCN_CONFIG_DIR(event->attr.config) <<
+			CCN_XP_DT_INTERFACE_SEL__DT_IO_SEL__SHIFT(wp);
+	val &= ~(CCN_XP_DT_INTERFACE_SEL__DT_DEV_SEL__MASK <<
+			CCN_XP_DT_INTERFACE_SEL__DT_DEV_SEL__SHIFT(wp));
+	val |= CCN_CONFIG_PORT(event->attr.config) <<
+			CCN_XP_DT_INTERFACE_SEL__DT_DEV_SEL__SHIFT(wp);
+	val &= ~(CCN_XP_DT_INTERFACE_SEL__DT_VC_SEL__MASK <<
+			CCN_XP_DT_INTERFACE_SEL__DT_VC_SEL__SHIFT(wp));
+	val |= CCN_CONFIG_VC(event->attr.config) <<
+			CCN_XP_DT_INTERFACE_SEL__DT_VC_SEL__SHIFT(wp);
+	writel(val, source->base + CCN_XP_DT_INTERFACE_SEL);
+
+	/* Comparison values */
+	writel(cmp_l & 0xffffffff, source->base + CCN_XP_DT_CMP_VAL_L(wp));
+	writel((cmp_l >> 32) & 0x7fffffff,
+			source->base + CCN_XP_DT_CMP_VAL_L(wp) + 4);
+	writel(cmp_h & 0xffffffff, source->base + CCN_XP_DT_CMP_VAL_H(wp));
+	writel((cmp_h >> 32) & 0x0fffffff,
+			source->base + CCN_XP_DT_CMP_VAL_H(wp) + 4);
+
+	/* Mask */
+	writel(mask_l & 0xffffffff, source->base + CCN_XP_DT_CMP_MASK_L(wp));
+	writel((mask_l >> 32) & 0x7fffffff,
+			source->base + CCN_XP_DT_CMP_MASK_L(wp) + 4);
+	writel(mask_h & 0xffffffff, source->base + CCN_XP_DT_CMP_MASK_H(wp));
+	writel((mask_h >> 32) & 0x0fffffff,
+			source->base + CCN_XP_DT_CMP_MASK_H(wp) + 4);
+}
+
+static void arm_ccn_pmu_xp_event_config(struct perf_event *event)
+{
+	struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu);
+	struct hw_perf_event *hw = &event->hw;
+	struct arm_ccn_component *source =
+			ccn->dt.pmu_counters[hw->idx].source;
+	u32 val, id;
+
+	hw->event_base = CCN_XP_DT_CONFIG__DT_CFG__XP_PMU_EVENT(hw->config_base);
+
+	id = (CCN_CONFIG_VC(event->attr.config) << 4) |
+			(CCN_CONFIG_BUS(event->attr.config) << 3) |
+			(CCN_CONFIG_EVENT(event->attr.config) << 0);
+
+	val = readl(source->base + CCN_XP_PMU_EVENT_SEL);
+	val &= ~(CCN_XP_PMU_EVENT_SEL__ID__MASK <<
+			CCN_XP_PMU_EVENT_SEL__ID__SHIFT(hw->config_base));
+	val |= id << CCN_XP_PMU_EVENT_SEL__ID__SHIFT(hw->config_base);
+	writel(val, source->base + CCN_XP_PMU_EVENT_SEL);
+}
+
+static void arm_ccn_pmu_node_event_config(struct perf_event *event)
+{
+	struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu);
+	struct hw_perf_event *hw = &event->hw;
+	struct arm_ccn_component *source =
+			ccn->dt.pmu_counters[hw->idx].source;
+	u32 type = CCN_CONFIG_TYPE(event->attr.config);
+	u32 val, port;
+
+	port = arm_ccn_node_to_xp_port(CCN_CONFIG_NODE(event->attr.config));
+	hw->event_base = CCN_XP_DT_CONFIG__DT_CFG__DEVICE_PMU_EVENT(port,
+			hw->config_base);
+
+	/* These *_event_sel regs should be identical, but let's make sure... */
+	BUILD_BUG_ON(CCN_HNF_PMU_EVENT_SEL != CCN_SBAS_PMU_EVENT_SEL);
+	BUILD_BUG_ON(CCN_SBAS_PMU_EVENT_SEL != CCN_RNI_PMU_EVENT_SEL);
+	BUILD_BUG_ON(CCN_HNF_PMU_EVENT_SEL__ID__SHIFT(1) !=
+			CCN_SBAS_PMU_EVENT_SEL__ID__SHIFT(1));
+	BUILD_BUG_ON(CCN_SBAS_PMU_EVENT_SEL__ID__SHIFT(1) !=
+			CCN_RNI_PMU_EVENT_SEL__ID__SHIFT(1));
+	BUILD_BUG_ON(CCN_HNF_PMU_EVENT_SEL__ID__MASK !=
+			CCN_SBAS_PMU_EVENT_SEL__ID__MASK);
+	BUILD_BUG_ON(CCN_SBAS_PMU_EVENT_SEL__ID__MASK !=
+			CCN_RNI_PMU_EVENT_SEL__ID__MASK);
+	if (WARN_ON(type != CCN_TYPE_HNF && type != CCN_TYPE_SBAS &&
+			!arm_ccn_pmu_type_eq(type, CCN_TYPE_RNI_3P)))
+		return;
+
+	/* Set the event id for the pre-allocated counter */
+	val = readl(source->base + CCN_HNF_PMU_EVENT_SEL);
+	val &= ~(CCN_HNF_PMU_EVENT_SEL__ID__MASK <<
+		CCN_HNF_PMU_EVENT_SEL__ID__SHIFT(hw->config_base));
+	val |= CCN_CONFIG_EVENT(event->attr.config) <<
+		CCN_HNF_PMU_EVENT_SEL__ID__SHIFT(hw->config_base);
+	writel(val, source->base + CCN_HNF_PMU_EVENT_SEL);
+}
+
+static void arm_ccn_pmu_event_config(struct perf_event *event)
+{
+	struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu);
+	struct hw_perf_event *hw = &event->hw;
+	u32 xp, offset, val;
+
+	/* Cycle counter requires no setup */
+	if (hw->idx == CCN_IDX_PMU_CYCLE_COUNTER)
+		return;
+
+	if (CCN_CONFIG_TYPE(event->attr.config) == CCN_TYPE_XP)
+		xp = CCN_CONFIG_XP(event->attr.config);
+	else
+		xp = arm_ccn_node_to_xp(CCN_CONFIG_NODE(event->attr.config));
+
+	spin_lock(&ccn->dt.config_lock);
+
+	/* Set the DT bus "distance" register */
+	offset = (hw->idx / 4) * 4;
+	val = readl(ccn->dt.base + CCN_DT_ACTIVE_DSM + offset);
+	val &= ~(CCN_DT_ACTIVE_DSM__DSM_ID__MASK <<
+			CCN_DT_ACTIVE_DSM__DSM_ID__SHIFT(hw->idx % 4));
+	val |= xp << CCN_DT_ACTIVE_DSM__DSM_ID__SHIFT(hw->idx % 4);
+	writel(val, ccn->dt.base + CCN_DT_ACTIVE_DSM + offset);
+
+	if (CCN_CONFIG_TYPE(event->attr.config) == CCN_TYPE_XP) {
+		if (CCN_CONFIG_EVENT(event->attr.config) ==
+				CCN_EVENT_WATCHPOINT)
+			arm_ccn_pmu_xp_watchpoint_config(event);
+		else
+			arm_ccn_pmu_xp_event_config(event);
+	} else {
+		arm_ccn_pmu_node_event_config(event);
+	}
+
+	spin_unlock(&ccn->dt.config_lock);
+}
+
+static int arm_ccn_pmu_active_counters(struct arm_ccn *ccn)
+{
+	return bitmap_weight(ccn->dt.pmu_counters_mask,
+			     CCN_NUM_PMU_EVENT_COUNTERS + 1);
+}
+
+static int arm_ccn_pmu_event_add(struct perf_event *event, int flags)
+{
+	int err;
+	struct hw_perf_event *hw = &event->hw;
+	struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu);
+
+	err = arm_ccn_pmu_event_alloc(event);
+	if (err)
+		return err;
+
+	/*
+	 * Pin the timer, so that the overflows are handled by the chosen
+	 * event->cpu (this is the same one as presented in "cpumask"
+	 * attribute).
+	 */
+	if (!ccn->irq && arm_ccn_pmu_active_counters(ccn) == 1)
+		hrtimer_start(&ccn->dt.hrtimer, arm_ccn_pmu_timer_period(),
+			      HRTIMER_MODE_REL_PINNED);
+
+	arm_ccn_pmu_event_config(event);
+
+	hw->state = PERF_HES_STOPPED;
+
+	if (flags & PERF_EF_START)
+		arm_ccn_pmu_event_start(event, PERF_EF_UPDATE);
+
+	return 0;
+}
+
+static void arm_ccn_pmu_event_del(struct perf_event *event, int flags)
+{
+	struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu);
+
+	arm_ccn_pmu_event_stop(event, PERF_EF_UPDATE);
+
+	arm_ccn_pmu_event_release(event);
+
+	if (!ccn->irq && arm_ccn_pmu_active_counters(ccn) == 0)
+		hrtimer_cancel(&ccn->dt.hrtimer);
+}
+
+static void arm_ccn_pmu_event_read(struct perf_event *event)
+{
+	arm_ccn_pmu_event_update(event);
+}
+
+static void arm_ccn_pmu_enable(struct pmu *pmu)
+{
+	struct arm_ccn *ccn = pmu_to_arm_ccn(pmu);
+
+	u32 val = readl(ccn->dt.base + CCN_DT_PMCR);
+	val |= CCN_DT_PMCR__PMU_EN;
+	writel(val, ccn->dt.base + CCN_DT_PMCR);
+}
+
+static void arm_ccn_pmu_disable(struct pmu *pmu)
+{
+	struct arm_ccn *ccn = pmu_to_arm_ccn(pmu);
+
+	u32 val = readl(ccn->dt.base + CCN_DT_PMCR);
+	val &= ~CCN_DT_PMCR__PMU_EN;
+	writel(val, ccn->dt.base + CCN_DT_PMCR);
+}
+
+static irqreturn_t arm_ccn_pmu_overflow_handler(struct arm_ccn_dt *dt)
+{
+	u32 pmovsr = readl(dt->base + CCN_DT_PMOVSR);
+	int idx;
+
+	if (!pmovsr)
+		return IRQ_NONE;
+
+	writel(pmovsr, dt->base + CCN_DT_PMOVSR_CLR);
+
+	BUILD_BUG_ON(CCN_IDX_PMU_CYCLE_COUNTER != CCN_NUM_PMU_EVENT_COUNTERS);
+
+	for (idx = 0; idx < CCN_NUM_PMU_EVENT_COUNTERS + 1; idx++) {
+		struct perf_event *event = dt->pmu_counters[idx].event;
+		int overflowed = pmovsr & BIT(idx);
+
+		WARN_ON_ONCE(overflowed && !event &&
+				idx != CCN_IDX_PMU_CYCLE_COUNTER);
+
+		if (!event || !overflowed)
+			continue;
+
+		arm_ccn_pmu_event_update(event);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static enum hrtimer_restart arm_ccn_pmu_timer_handler(struct hrtimer *hrtimer)
+{
+	struct arm_ccn_dt *dt = container_of(hrtimer, struct arm_ccn_dt,
+			hrtimer);
+	unsigned long flags;
+
+	local_irq_save(flags);
+	arm_ccn_pmu_overflow_handler(dt);
+	local_irq_restore(flags);
+
+	hrtimer_forward_now(hrtimer, arm_ccn_pmu_timer_period());
+	return HRTIMER_RESTART;
+}
+
+
+static int arm_ccn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct arm_ccn_dt *dt = hlist_entry_safe(node, struct arm_ccn_dt, node);
+	struct arm_ccn *ccn = container_of(dt, struct arm_ccn, dt);
+	unsigned int target;
+
+	if (cpu != dt->cpu)
+		return 0;
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+	perf_pmu_migrate_context(&dt->pmu, cpu, target);
+	dt->cpu = target;
+	if (ccn->irq)
+		WARN_ON(irq_set_affinity(ccn->irq, cpumask_of(dt->cpu)));
+	return 0;
+}
+
+static DEFINE_IDA(arm_ccn_pmu_ida);
+
+static int arm_ccn_pmu_init(struct arm_ccn *ccn)
+{
+	int i;
+	char *name;
+	int err;
+
+	/* Initialize DT subsystem */
+	ccn->dt.base = ccn->base + CCN_REGION_SIZE;
+	spin_lock_init(&ccn->dt.config_lock);
+	writel(CCN_DT_PMOVSR_CLR__MASK, ccn->dt.base + CCN_DT_PMOVSR_CLR);
+	writel(CCN_DT_CTL__DT_EN, ccn->dt.base + CCN_DT_CTL);
+	writel(CCN_DT_PMCR__OVFL_INTR_EN | CCN_DT_PMCR__PMU_EN,
+			ccn->dt.base + CCN_DT_PMCR);
+	writel(0x1, ccn->dt.base + CCN_DT_PMSR_CLR);
+	for (i = 0; i < ccn->num_xps; i++) {
+		writel(0, ccn->xp[i].base + CCN_XP_DT_CONFIG);
+		writel((CCN_XP_DT_CONTROL__WP_ARM_SEL__ALWAYS <<
+				CCN_XP_DT_CONTROL__WP_ARM_SEL__SHIFT(0)) |
+				(CCN_XP_DT_CONTROL__WP_ARM_SEL__ALWAYS <<
+				CCN_XP_DT_CONTROL__WP_ARM_SEL__SHIFT(1)) |
+				CCN_XP_DT_CONTROL__DT_ENABLE,
+				ccn->xp[i].base + CCN_XP_DT_CONTROL);
+	}
+	ccn->dt.cmp_mask[CCN_IDX_MASK_ANY].l = ~0;
+	ccn->dt.cmp_mask[CCN_IDX_MASK_ANY].h = ~0;
+	ccn->dt.cmp_mask[CCN_IDX_MASK_EXACT].l = 0;
+	ccn->dt.cmp_mask[CCN_IDX_MASK_EXACT].h = 0;
+	ccn->dt.cmp_mask[CCN_IDX_MASK_ORDER].l = ~0;
+	ccn->dt.cmp_mask[CCN_IDX_MASK_ORDER].h = ~(0x1 << 15);
+	ccn->dt.cmp_mask[CCN_IDX_MASK_OPCODE].l = ~0;
+	ccn->dt.cmp_mask[CCN_IDX_MASK_OPCODE].h = ~(0x1f << 9);
+
+	/* Get a convenient /sys/event_source/devices/ name */
+	ccn->dt.id = ida_alloc(&arm_ccn_pmu_ida, GFP_KERNEL);
+	if (ccn->dt.id == 0) {
+		name = "ccn";
+	} else {
+		name = devm_kasprintf(ccn->dev, GFP_KERNEL, "ccn_%d",
+				      ccn->dt.id);
+		if (!name) {
+			err = -ENOMEM;
+			goto error_choose_name;
+		}
+	}
+
+	/* Perf driver registration */
+	ccn->dt.pmu = (struct pmu) {
+		.module = THIS_MODULE,
+		.attr_groups = arm_ccn_pmu_attr_groups,
+		.task_ctx_nr = perf_invalid_context,
+		.event_init = arm_ccn_pmu_event_init,
+		.add = arm_ccn_pmu_event_add,
+		.del = arm_ccn_pmu_event_del,
+		.start = arm_ccn_pmu_event_start,
+		.stop = arm_ccn_pmu_event_stop,
+		.read = arm_ccn_pmu_event_read,
+		.pmu_enable = arm_ccn_pmu_enable,
+		.pmu_disable = arm_ccn_pmu_disable,
+		.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+	};
+
+	/* No overflow interrupt? Have to use a timer instead. */
+	if (!ccn->irq) {
+		dev_info(ccn->dev, "No access to interrupts, using timer.\n");
+		hrtimer_init(&ccn->dt.hrtimer, CLOCK_MONOTONIC,
+				HRTIMER_MODE_REL);
+		ccn->dt.hrtimer.function = arm_ccn_pmu_timer_handler;
+	}
+
+	/* Pick one CPU which we will use to collect data from CCN... */
+	ccn->dt.cpu = raw_smp_processor_id();
+
+	/* Also make sure that the overflow interrupt is handled by this CPU */
+	if (ccn->irq) {
+		err = irq_set_affinity(ccn->irq, cpumask_of(ccn->dt.cpu));
+		if (err) {
+			dev_err(ccn->dev, "Failed to set interrupt affinity!\n");
+			goto error_set_affinity;
+		}
+	}
+
+	cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE,
+					 &ccn->dt.node);
+
+	err = perf_pmu_register(&ccn->dt.pmu, name, -1);
+	if (err)
+		goto error_pmu_register;
+
+	return 0;
+
+error_pmu_register:
+	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE,
+					    &ccn->dt.node);
+error_set_affinity:
+error_choose_name:
+	ida_free(&arm_ccn_pmu_ida, ccn->dt.id);
+	for (i = 0; i < ccn->num_xps; i++)
+		writel(0, ccn->xp[i].base + CCN_XP_DT_CONTROL);
+	writel(0, ccn->dt.base + CCN_DT_PMCR);
+	return err;
+}
+
+static void arm_ccn_pmu_cleanup(struct arm_ccn *ccn)
+{
+	int i;
+
+	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE,
+					    &ccn->dt.node);
+	for (i = 0; i < ccn->num_xps; i++)
+		writel(0, ccn->xp[i].base + CCN_XP_DT_CONTROL);
+	writel(0, ccn->dt.base + CCN_DT_PMCR);
+	perf_pmu_unregister(&ccn->dt.pmu);
+	ida_free(&arm_ccn_pmu_ida, ccn->dt.id);
+}
+
+static int arm_ccn_for_each_valid_region(struct arm_ccn *ccn,
+		int (*callback)(struct arm_ccn *ccn, int region,
+		void __iomem *base, u32 type, u32 id))
+{
+	int region;
+
+	for (region = 0; region < CCN_NUM_REGIONS; region++) {
+		u32 val, type, id;
+		void __iomem *base;
+		int err;
+
+		val = readl(ccn->base + CCN_MN_OLY_COMP_LIST_63_0 +
+				4 * (region / 32));
+		if (!(val & (1 << (region % 32))))
+			continue;
+
+		base = ccn->base + region * CCN_REGION_SIZE;
+		val = readl(base + CCN_ALL_OLY_ID);
+		type = (val >> CCN_ALL_OLY_ID__OLY_ID__SHIFT) &
+				CCN_ALL_OLY_ID__OLY_ID__MASK;
+		id = (val >> CCN_ALL_OLY_ID__NODE_ID__SHIFT) &
+				CCN_ALL_OLY_ID__NODE_ID__MASK;
+
+		err = callback(ccn, region, base, type, id);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int arm_ccn_get_nodes_num(struct arm_ccn *ccn, int region,
+		void __iomem *base, u32 type, u32 id)
+{
+
+	if (type == CCN_TYPE_XP && id >= ccn->num_xps)
+		ccn->num_xps = id + 1;
+	else if (id >= ccn->num_nodes)
+		ccn->num_nodes = id + 1;
+
+	return 0;
+}
+
+static int arm_ccn_init_nodes(struct arm_ccn *ccn, int region,
+		void __iomem *base, u32 type, u32 id)
+{
+	struct arm_ccn_component *component;
+
+	dev_dbg(ccn->dev, "Region %d: id=%u, type=0x%02x\n", region, id, type);
+
+	switch (type) {
+	case CCN_TYPE_MN:
+		ccn->mn_id = id;
+		return 0;
+	case CCN_TYPE_DT:
+		return 0;
+	case CCN_TYPE_XP:
+		component = &ccn->xp[id];
+		break;
+	case CCN_TYPE_SBSX:
+		ccn->sbsx_present = 1;
+		component = &ccn->node[id];
+		break;
+	case CCN_TYPE_SBAS:
+		ccn->sbas_present = 1;
+		fallthrough;
+	default:
+		component = &ccn->node[id];
+		break;
+	}
+
+	component->base = base;
+	component->type = type;
+
+	return 0;
+}
+
+
+static irqreturn_t arm_ccn_error_handler(struct arm_ccn *ccn,
+		const u32 *err_sig_val)
+{
+	/* This should be really handled by firmware... */
+	dev_err(ccn->dev, "Error reported in %08x%08x%08x%08x%08x%08x.\n",
+			err_sig_val[5], err_sig_val[4], err_sig_val[3],
+			err_sig_val[2], err_sig_val[1], err_sig_val[0]);
+	dev_err(ccn->dev, "Disabling interrupt generation for all errors.\n");
+	writel(CCN_MN_ERRINT_STATUS__ALL_ERRORS__DISABLE,
+			ccn->base + CCN_MN_ERRINT_STATUS);
+
+	return IRQ_HANDLED;
+}
+
+
+static irqreturn_t arm_ccn_irq_handler(int irq, void *dev_id)
+{
+	irqreturn_t res = IRQ_NONE;
+	struct arm_ccn *ccn = dev_id;
+	u32 err_sig_val[6];
+	u32 err_or;
+	int i;
+
+	/* PMU overflow is a special case */
+	err_or = err_sig_val[0] = readl(ccn->base + CCN_MN_ERR_SIG_VAL_63_0);
+	if (err_or & CCN_MN_ERR_SIG_VAL_63_0__DT) {
+		err_or &= ~CCN_MN_ERR_SIG_VAL_63_0__DT;
+		res = arm_ccn_pmu_overflow_handler(&ccn->dt);
+	}
+
+	/* Have to read all err_sig_vals to clear them */
+	for (i = 1; i < ARRAY_SIZE(err_sig_val); i++) {
+		err_sig_val[i] = readl(ccn->base +
+				CCN_MN_ERR_SIG_VAL_63_0 + i * 4);
+		err_or |= err_sig_val[i];
+	}
+	if (err_or)
+		res |= arm_ccn_error_handler(ccn, err_sig_val);
+
+	if (res != IRQ_NONE)
+		writel(CCN_MN_ERRINT_STATUS__INTREQ__DESSERT,
+				ccn->base + CCN_MN_ERRINT_STATUS);
+
+	return res;
+}
+
+
+static int arm_ccn_probe(struct platform_device *pdev)
+{
+	struct arm_ccn *ccn;
+	int irq;
+	int err;
+
+	ccn = devm_kzalloc(&pdev->dev, sizeof(*ccn), GFP_KERNEL);
+	if (!ccn)
+		return -ENOMEM;
+	ccn->dev = &pdev->dev;
+	platform_set_drvdata(pdev, ccn);
+
+	ccn->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(ccn->base))
+		return PTR_ERR(ccn->base);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	/* Check if we can use the interrupt */
+	writel(CCN_MN_ERRINT_STATUS__PMU_EVENTS__DISABLE,
+			ccn->base + CCN_MN_ERRINT_STATUS);
+	if (readl(ccn->base + CCN_MN_ERRINT_STATUS) &
+			CCN_MN_ERRINT_STATUS__PMU_EVENTS__DISABLED) {
+		/* Can set 'disable' bits, so can acknowledge interrupts */
+		writel(CCN_MN_ERRINT_STATUS__PMU_EVENTS__ENABLE,
+				ccn->base + CCN_MN_ERRINT_STATUS);
+		err = devm_request_irq(ccn->dev, irq, arm_ccn_irq_handler,
+				       IRQF_NOBALANCING | IRQF_NO_THREAD,
+				       dev_name(ccn->dev), ccn);
+		if (err)
+			return err;
+
+		ccn->irq = irq;
+	}
+
+
+	/* Build topology */
+
+	err = arm_ccn_for_each_valid_region(ccn, arm_ccn_get_nodes_num);
+	if (err)
+		return err;
+
+	ccn->node = devm_kcalloc(ccn->dev, ccn->num_nodes, sizeof(*ccn->node),
+				 GFP_KERNEL);
+	ccn->xp = devm_kcalloc(ccn->dev, ccn->num_xps, sizeof(*ccn->node),
+			       GFP_KERNEL);
+	if (!ccn->node || !ccn->xp)
+		return -ENOMEM;
+
+	err = arm_ccn_for_each_valid_region(ccn, arm_ccn_init_nodes);
+	if (err)
+		return err;
+
+	return arm_ccn_pmu_init(ccn);
+}
+
+static int arm_ccn_remove(struct platform_device *pdev)
+{
+	struct arm_ccn *ccn = platform_get_drvdata(pdev);
+
+	arm_ccn_pmu_cleanup(ccn);
+
+	return 0;
+}
+
+static const struct of_device_id arm_ccn_match[] = {
+	{ .compatible = "arm,ccn-502", },
+	{ .compatible = "arm,ccn-504", },
+	{ .compatible = "arm,ccn-512", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, arm_ccn_match);
+
+static struct platform_driver arm_ccn_driver = {
+	.driver = {
+		.name = "arm-ccn",
+		.of_match_table = arm_ccn_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe = arm_ccn_probe,
+	.remove = arm_ccn_remove,
+};
+
+static int __init arm_ccn_init(void)
+{
+	int i, ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_CCN_ONLINE,
+				      "perf/arm/ccn:online", NULL,
+				      arm_ccn_pmu_offline_cpu);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < ARRAY_SIZE(arm_ccn_pmu_events); i++)
+		arm_ccn_pmu_events_attrs[i] = &arm_ccn_pmu_events[i].attr.attr;
+
+	ret = platform_driver_register(&arm_ccn_driver);
+	if (ret)
+		cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CCN_ONLINE);
+	return ret;
+}
+
+static void __exit arm_ccn_exit(void)
+{
+	platform_driver_unregister(&arm_ccn_driver);
+	cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CCN_ONLINE);
+}
+
+module_init(arm_ccn_init);
+module_exit(arm_ccn_exit);
+
+MODULE_AUTHOR("Pawel Moll <pawel.moll@arm.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
new file mode 100644
index 0000000000..6404b17d3a
--- /dev/null
+++ b/drivers/perf/arm-cmn.c
@@ -0,0 +1,2580 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2016-2020 Arm Limited
+// CMN-600 Coherent Mesh Network PMU driver
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/debugfs.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/sort.h>
+
+/* Common register stuff */
+#define CMN_NODE_INFO			0x0000
+#define CMN_NI_NODE_TYPE		GENMASK_ULL(15, 0)
+#define CMN_NI_NODE_ID			GENMASK_ULL(31, 16)
+#define CMN_NI_LOGICAL_ID		GENMASK_ULL(47, 32)
+
+#define CMN_NODEID_DEVID(reg)		((reg) & 3)
+#define CMN_NODEID_EXT_DEVID(reg)	((reg) & 1)
+#define CMN_NODEID_PID(reg)		(((reg) >> 2) & 1)
+#define CMN_NODEID_EXT_PID(reg)		(((reg) >> 1) & 3)
+#define CMN_NODEID_1x1_PID(reg)		(((reg) >> 2) & 7)
+#define CMN_NODEID_X(reg, bits)		((reg) >> (3 + (bits)))
+#define CMN_NODEID_Y(reg, bits)		(((reg) >> 3) & ((1U << (bits)) - 1))
+
+#define CMN_CHILD_INFO			0x0080
+#define CMN_CI_CHILD_COUNT		GENMASK_ULL(15, 0)
+#define CMN_CI_CHILD_PTR_OFFSET		GENMASK_ULL(31, 16)
+
+#define CMN_CHILD_NODE_ADDR		GENMASK(29, 0)
+#define CMN_CHILD_NODE_EXTERNAL		BIT(31)
+
+#define CMN_MAX_DIMENSION		12
+#define CMN_MAX_XPS			(CMN_MAX_DIMENSION * CMN_MAX_DIMENSION)
+#define CMN_MAX_DTMS			(CMN_MAX_XPS + (CMN_MAX_DIMENSION - 1) * 4)
+
+/* The CFG node has various info besides the discovery tree */
+#define CMN_CFGM_PERIPH_ID_01		0x0008
+#define CMN_CFGM_PID0_PART_0		GENMASK_ULL(7, 0)
+#define CMN_CFGM_PID1_PART_1		GENMASK_ULL(35, 32)
+#define CMN_CFGM_PERIPH_ID_23		0x0010
+#define CMN_CFGM_PID2_REVISION		GENMASK_ULL(7, 4)
+
+#define CMN_CFGM_INFO_GLOBAL		0x900
+#define CMN_INFO_MULTIPLE_DTM_EN	BIT_ULL(63)
+#define CMN_INFO_RSP_VC_NUM		GENMASK_ULL(53, 52)
+#define CMN_INFO_DAT_VC_NUM		GENMASK_ULL(51, 50)
+
+#define CMN_CFGM_INFO_GLOBAL_1		0x908
+#define CMN_INFO_SNP_VC_NUM		GENMASK_ULL(3, 2)
+#define CMN_INFO_REQ_VC_NUM		GENMASK_ULL(1, 0)
+
+/* XPs also have some local topology info which has uses too */
+#define CMN_MXP__CONNECT_INFO(p)	(0x0008 + 8 * (p))
+#define CMN__CONNECT_INFO_DEVICE_TYPE	GENMASK_ULL(4, 0)
+
+#define CMN_MAX_PORTS			6
+#define CI700_CONNECT_INFO_P2_5_OFFSET	0x10
+
+/* PMU registers occupy the 3rd 4KB page of each node's region */
+#define CMN_PMU_OFFSET			0x2000
+
+/* For most nodes, this is all there is */
+#define CMN_PMU_EVENT_SEL		0x000
+#define CMN__PMU_CBUSY_SNTHROTTLE_SEL	GENMASK_ULL(44, 42)
+#define CMN__PMU_SN_HOME_SEL		GENMASK_ULL(40, 39)
+#define CMN__PMU_HBT_LBT_SEL		GENMASK_ULL(38, 37)
+#define CMN__PMU_CLASS_OCCUP_ID		GENMASK_ULL(36, 35)
+/* Technically this is 4 bits wide on DNs, but we only use 2 there anyway */
+#define CMN__PMU_OCCUP1_ID		GENMASK_ULL(34, 32)
+
+/* HN-Ps are weird... */
+#define CMN_HNP_PMU_EVENT_SEL		0x008
+
+/* DTMs live in the PMU space of XP registers */
+#define CMN_DTM_WPn(n)			(0x1A0 + (n) * 0x18)
+#define CMN_DTM_WPn_CONFIG(n)		(CMN_DTM_WPn(n) + 0x00)
+#define CMN_DTM_WPn_CONFIG_WP_CHN_NUM	GENMASK_ULL(20, 19)
+#define CMN_DTM_WPn_CONFIG_WP_DEV_SEL2	GENMASK_ULL(18, 17)
+#define CMN_DTM_WPn_CONFIG_WP_COMBINE	BIT(9)
+#define CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE	BIT(8)
+#define CMN600_WPn_CONFIG_WP_COMBINE	BIT(6)
+#define CMN600_WPn_CONFIG_WP_EXCLUSIVE	BIT(5)
+#define CMN_DTM_WPn_CONFIG_WP_GRP	GENMASK_ULL(5, 4)
+#define CMN_DTM_WPn_CONFIG_WP_CHN_SEL	GENMASK_ULL(3, 1)
+#define CMN_DTM_WPn_CONFIG_WP_DEV_SEL	BIT(0)
+#define CMN_DTM_WPn_VAL(n)		(CMN_DTM_WPn(n) + 0x08)
+#define CMN_DTM_WPn_MASK(n)		(CMN_DTM_WPn(n) + 0x10)
+
+#define CMN_DTM_PMU_CONFIG		0x210
+#define CMN__PMEVCNT0_INPUT_SEL		GENMASK_ULL(37, 32)
+#define CMN__PMEVCNT0_INPUT_SEL_WP	0x00
+#define CMN__PMEVCNT0_INPUT_SEL_XP	0x04
+#define CMN__PMEVCNT0_INPUT_SEL_DEV	0x10
+#define CMN__PMEVCNT0_GLOBAL_NUM	GENMASK_ULL(18, 16)
+#define CMN__PMEVCNTn_GLOBAL_NUM_SHIFT(n)	((n) * 4)
+#define CMN__PMEVCNT_PAIRED(n)		BIT(4 + (n))
+#define CMN__PMEVCNT23_COMBINED		BIT(2)
+#define CMN__PMEVCNT01_COMBINED		BIT(1)
+#define CMN_DTM_PMU_CONFIG_PMU_EN	BIT(0)
+
+#define CMN_DTM_PMEVCNT			0x220
+
+#define CMN_DTM_PMEVCNTSR		0x240
+
+#define CMN650_DTM_UNIT_INFO		0x0910
+#define CMN_DTM_UNIT_INFO		0x0960
+#define CMN_DTM_UNIT_INFO_DTC_DOMAIN	GENMASK_ULL(1, 0)
+
+#define CMN_DTM_NUM_COUNTERS		4
+/* Want more local counters? Why not replicate the whole DTM! Ugh... */
+#define CMN_DTM_OFFSET(n)		((n) * 0x200)
+
+/* The DTC node is where the magic happens */
+#define CMN_DT_DTC_CTL			0x0a00
+#define CMN_DT_DTC_CTL_DT_EN		BIT(0)
+
+/* DTC counters are paired in 64-bit registers on a 16-byte stride. Yuck */
+#define _CMN_DT_CNT_REG(n)		((((n) / 2) * 4 + (n) % 2) * 4)
+#define CMN_DT_PMEVCNT(n)		(CMN_PMU_OFFSET + _CMN_DT_CNT_REG(n))
+#define CMN_DT_PMCCNTR			(CMN_PMU_OFFSET + 0x40)
+
+#define CMN_DT_PMEVCNTSR(n)		(CMN_PMU_OFFSET + 0x50 + _CMN_DT_CNT_REG(n))
+#define CMN_DT_PMCCNTRSR		(CMN_PMU_OFFSET + 0x90)
+
+#define CMN_DT_PMCR			(CMN_PMU_OFFSET + 0x100)
+#define CMN_DT_PMCR_PMU_EN		BIT(0)
+#define CMN_DT_PMCR_CNTR_RST		BIT(5)
+#define CMN_DT_PMCR_OVFL_INTR_EN	BIT(6)
+
+#define CMN_DT_PMOVSR			(CMN_PMU_OFFSET + 0x118)
+#define CMN_DT_PMOVSR_CLR		(CMN_PMU_OFFSET + 0x120)
+
+#define CMN_DT_PMSSR			(CMN_PMU_OFFSET + 0x128)
+#define CMN_DT_PMSSR_SS_STATUS(n)	BIT(n)
+
+#define CMN_DT_PMSRR			(CMN_PMU_OFFSET + 0x130)
+#define CMN_DT_PMSRR_SS_REQ		BIT(0)
+
+#define CMN_DT_NUM_COUNTERS		8
+#define CMN_MAX_DTCS			4
+
+/*
+ * Even in the worst case a DTC counter can't wrap in fewer than 2^42 cycles,
+ * so throwing away one bit to make overflow handling easy is no big deal.
+ */
+#define CMN_COUNTER_INIT		0x80000000
+/* Similarly for the 40-bit cycle counter */
+#define CMN_CC_INIT			0x8000000000ULL
+
+
+/* Event attributes */
+#define CMN_CONFIG_TYPE			GENMASK_ULL(15, 0)
+#define CMN_CONFIG_EVENTID		GENMASK_ULL(26, 16)
+#define CMN_CONFIG_OCCUPID		GENMASK_ULL(30, 27)
+#define CMN_CONFIG_BYNODEID		BIT_ULL(31)
+#define CMN_CONFIG_NODEID		GENMASK_ULL(47, 32)
+
+#define CMN_EVENT_TYPE(event)		FIELD_GET(CMN_CONFIG_TYPE, (event)->attr.config)
+#define CMN_EVENT_EVENTID(event)	FIELD_GET(CMN_CONFIG_EVENTID, (event)->attr.config)
+#define CMN_EVENT_OCCUPID(event)	FIELD_GET(CMN_CONFIG_OCCUPID, (event)->attr.config)
+#define CMN_EVENT_BYNODEID(event)	FIELD_GET(CMN_CONFIG_BYNODEID, (event)->attr.config)
+#define CMN_EVENT_NODEID(event)		FIELD_GET(CMN_CONFIG_NODEID, (event)->attr.config)
+
+#define CMN_CONFIG_WP_COMBINE		GENMASK_ULL(30, 27)
+#define CMN_CONFIG_WP_DEV_SEL		GENMASK_ULL(50, 48)
+#define CMN_CONFIG_WP_CHN_SEL		GENMASK_ULL(55, 51)
+/* Note that we don't yet support the tertiary match group on newer IPs */
+#define CMN_CONFIG_WP_GRP		BIT_ULL(56)
+#define CMN_CONFIG_WP_EXCLUSIVE		BIT_ULL(57)
+#define CMN_CONFIG1_WP_VAL		GENMASK_ULL(63, 0)
+#define CMN_CONFIG2_WP_MASK		GENMASK_ULL(63, 0)
+
+#define CMN_EVENT_WP_COMBINE(event)	FIELD_GET(CMN_CONFIG_WP_COMBINE, (event)->attr.config)
+#define CMN_EVENT_WP_DEV_SEL(event)	FIELD_GET(CMN_CONFIG_WP_DEV_SEL, (event)->attr.config)
+#define CMN_EVENT_WP_CHN_SEL(event)	FIELD_GET(CMN_CONFIG_WP_CHN_SEL, (event)->attr.config)
+#define CMN_EVENT_WP_GRP(event)		FIELD_GET(CMN_CONFIG_WP_GRP, (event)->attr.config)
+#define CMN_EVENT_WP_EXCLUSIVE(event)	FIELD_GET(CMN_CONFIG_WP_EXCLUSIVE, (event)->attr.config)
+#define CMN_EVENT_WP_VAL(event)		FIELD_GET(CMN_CONFIG1_WP_VAL, (event)->attr.config1)
+#define CMN_EVENT_WP_MASK(event)	FIELD_GET(CMN_CONFIG2_WP_MASK, (event)->attr.config2)
+
+/* Made-up event IDs for watchpoint direction */
+#define CMN_WP_UP			0
+#define CMN_WP_DOWN			2
+
+
+/* Internal values for encoding event support */
+enum cmn_model {
+	CMN600 = 1,
+	CMN650 = 2,
+	CMN700 = 4,
+	CI700 = 8,
+	/* ...and then we can use bitmap tricks for commonality */
+	CMN_ANY = -1,
+	NOT_CMN600 = -2,
+	CMN_650ON = CMN650 | CMN700,
+};
+
+/* Actual part numbers and revision IDs defined by the hardware */
+enum cmn_part {
+	PART_CMN600 = 0x434,
+	PART_CMN650 = 0x436,
+	PART_CMN700 = 0x43c,
+	PART_CI700 = 0x43a,
+};
+
+/* CMN-600 r0px shouldn't exist in silicon, thankfully */
+enum cmn_revision {
+	REV_CMN600_R1P0,
+	REV_CMN600_R1P1,
+	REV_CMN600_R1P2,
+	REV_CMN600_R1P3,
+	REV_CMN600_R2P0,
+	REV_CMN600_R3P0,
+	REV_CMN600_R3P1,
+	REV_CMN650_R0P0 = 0,
+	REV_CMN650_R1P0,
+	REV_CMN650_R1P1,
+	REV_CMN650_R2P0,
+	REV_CMN650_R1P2,
+	REV_CMN700_R0P0 = 0,
+	REV_CMN700_R1P0,
+	REV_CMN700_R2P0,
+	REV_CMN700_R3P0,
+	REV_CI700_R0P0 = 0,
+	REV_CI700_R1P0,
+	REV_CI700_R2P0,
+};
+
+enum cmn_node_type {
+	CMN_TYPE_INVALID,
+	CMN_TYPE_DVM,
+	CMN_TYPE_CFG,
+	CMN_TYPE_DTC,
+	CMN_TYPE_HNI,
+	CMN_TYPE_HNF,
+	CMN_TYPE_XP,
+	CMN_TYPE_SBSX,
+	CMN_TYPE_MPAM_S,
+	CMN_TYPE_MPAM_NS,
+	CMN_TYPE_RNI,
+	CMN_TYPE_RND = 0xd,
+	CMN_TYPE_RNSAM = 0xf,
+	CMN_TYPE_MTSX,
+	CMN_TYPE_HNP,
+	CMN_TYPE_CXRA = 0x100,
+	CMN_TYPE_CXHA,
+	CMN_TYPE_CXLA,
+	CMN_TYPE_CCRA,
+	CMN_TYPE_CCHA,
+	CMN_TYPE_CCLA,
+	CMN_TYPE_CCLA_RNI,
+	CMN_TYPE_HNS = 0x200,
+	CMN_TYPE_HNS_MPAM_S,
+	CMN_TYPE_HNS_MPAM_NS,
+	/* Not a real node type */
+	CMN_TYPE_WP = 0x7770
+};
+
+enum cmn_filter_select {
+	SEL_NONE = -1,
+	SEL_OCCUP1ID,
+	SEL_CLASS_OCCUP_ID,
+	SEL_CBUSY_SNTHROTTLE_SEL,
+	SEL_HBT_LBT_SEL,
+	SEL_SN_HOME_SEL,
+	SEL_MAX
+};
+
+struct arm_cmn_node {
+	void __iomem *pmu_base;
+	u16 id, logid;
+	enum cmn_node_type type;
+
+	int dtm;
+	union {
+		/* DN/HN-F/CXHA */
+		struct {
+			u8 val : 4;
+			u8 count : 4;
+		} occupid[SEL_MAX];
+		/* XP */
+		u8 dtc;
+	};
+	union {
+		u8 event[4];
+		__le32 event_sel;
+		u16 event_w[4];
+		__le64 event_sel_w;
+	};
+};
+
+struct arm_cmn_dtm {
+	void __iomem *base;
+	u32 pmu_config_low;
+	union {
+		u8 input_sel[4];
+		__le32 pmu_config_high;
+	};
+	s8 wp_event[4];
+};
+
+struct arm_cmn_dtc {
+	void __iomem *base;
+	int irq;
+	int irq_friend;
+	bool cc_active;
+
+	struct perf_event *counters[CMN_DT_NUM_COUNTERS];
+	struct perf_event *cycles;
+};
+
+#define CMN_STATE_DISABLED	BIT(0)
+#define CMN_STATE_TXN		BIT(1)
+
+struct arm_cmn {
+	struct device *dev;
+	void __iomem *base;
+	unsigned int state;
+
+	enum cmn_revision rev;
+	enum cmn_part part;
+	u8 mesh_x;
+	u8 mesh_y;
+	u16 num_xps;
+	u16 num_dns;
+	bool multi_dtm;
+	u8 ports_used;
+	struct {
+		unsigned int rsp_vc_num : 2;
+		unsigned int dat_vc_num : 2;
+		unsigned int snp_vc_num : 2;
+		unsigned int req_vc_num : 2;
+	};
+
+	struct arm_cmn_node *xps;
+	struct arm_cmn_node *dns;
+
+	struct arm_cmn_dtm *dtms;
+	struct arm_cmn_dtc *dtc;
+	unsigned int num_dtcs;
+
+	int cpu;
+	struct hlist_node cpuhp_node;
+
+	struct pmu pmu;
+	struct dentry *debug;
+};
+
+#define to_cmn(p)	container_of(p, struct arm_cmn, pmu)
+
+static int arm_cmn_hp_state;
+
+struct arm_cmn_nodeid {
+	u8 x;
+	u8 y;
+	u8 port;
+	u8 dev;
+};
+
+static int arm_cmn_xyidbits(const struct arm_cmn *cmn)
+{
+	return fls((cmn->mesh_x - 1) | (cmn->mesh_y - 1) | 2);
+}
+
+static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn *cmn, u16 id)
+{
+	struct arm_cmn_nodeid nid;
+
+	if (cmn->num_xps == 1) {
+		nid.x = 0;
+		nid.y = 0;
+		nid.port = CMN_NODEID_1x1_PID(id);
+		nid.dev = CMN_NODEID_DEVID(id);
+	} else {
+		int bits = arm_cmn_xyidbits(cmn);
+
+		nid.x = CMN_NODEID_X(id, bits);
+		nid.y = CMN_NODEID_Y(id, bits);
+		if (cmn->ports_used & 0xc) {
+			nid.port = CMN_NODEID_EXT_PID(id);
+			nid.dev = CMN_NODEID_EXT_DEVID(id);
+		} else {
+			nid.port = CMN_NODEID_PID(id);
+			nid.dev = CMN_NODEID_DEVID(id);
+		}
+	}
+	return nid;
+}
+
+static struct arm_cmn_node *arm_cmn_node_to_xp(const struct arm_cmn *cmn,
+					       const struct arm_cmn_node *dn)
+{
+	struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
+	int xp_idx = cmn->mesh_x * nid.y + nid.x;
+
+	return cmn->xps + xp_idx;
+}
+static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn,
+					 enum cmn_node_type type)
+{
+	struct arm_cmn_node *dn;
+
+	for (dn = cmn->dns; dn->type; dn++)
+		if (dn->type == type)
+			return dn;
+	return NULL;
+}
+
+static enum cmn_model arm_cmn_model(const struct arm_cmn *cmn)
+{
+	switch (cmn->part) {
+	case PART_CMN600:
+		return CMN600;
+	case PART_CMN650:
+		return CMN650;
+	case PART_CMN700:
+		return CMN700;
+	case PART_CI700:
+		return CI700;
+	default:
+		return 0;
+	};
+}
+
+static u32 arm_cmn_device_connect_info(const struct arm_cmn *cmn,
+				       const struct arm_cmn_node *xp, int port)
+{
+	int offset = CMN_MXP__CONNECT_INFO(port);
+
+	if (port >= 2) {
+		if (cmn->part == PART_CMN600 || cmn->part == PART_CMN650)
+			return 0;
+		/*
+		 * CI-700 may have extra ports, but still has the
+		 * mesh_port_connect_info registers in the way.
+		 */
+		if (cmn->part == PART_CI700)
+			offset += CI700_CONNECT_INFO_P2_5_OFFSET;
+	}
+
+	return readl_relaxed(xp->pmu_base - CMN_PMU_OFFSET + offset);
+}
+
+static struct dentry *arm_cmn_debugfs;
+
+#ifdef CONFIG_DEBUG_FS
+static const char *arm_cmn_device_type(u8 type)
+{
+	switch(FIELD_GET(CMN__CONNECT_INFO_DEVICE_TYPE, type)) {
+		case 0x00: return "        |";
+		case 0x01: return "  RN-I  |";
+		case 0x02: return "  RN-D  |";
+		case 0x04: return " RN-F_B |";
+		case 0x05: return "RN-F_B_E|";
+		case 0x06: return " RN-F_A |";
+		case 0x07: return "RN-F_A_E|";
+		case 0x08: return "  HN-T  |";
+		case 0x09: return "  HN-I  |";
+		case 0x0a: return "  HN-D  |";
+		case 0x0b: return "  HN-P  |";
+		case 0x0c: return "  SN-F  |";
+		case 0x0d: return "  SBSX  |";
+		case 0x0e: return "  HN-F  |";
+		case 0x0f: return " SN-F_E |";
+		case 0x10: return " SN-F_D |";
+		case 0x11: return "  CXHA  |";
+		case 0x12: return "  CXRA  |";
+		case 0x13: return "  CXRH  |";
+		case 0x14: return " RN-F_D |";
+		case 0x15: return "RN-F_D_E|";
+		case 0x16: return " RN-F_C |";
+		case 0x17: return "RN-F_C_E|";
+		case 0x18: return " RN-F_E |";
+		case 0x19: return "RN-F_E_E|";
+		case 0x1c: return "  MTSX  |";
+		case 0x1d: return "  HN-V  |";
+		case 0x1e: return "  CCG   |";
+		default:   return "  ????  |";
+	}
+}
+
+static void arm_cmn_show_logid(struct seq_file *s, int x, int y, int p, int d)
+{
+	struct arm_cmn *cmn = s->private;
+	struct arm_cmn_node *dn;
+
+	for (dn = cmn->dns; dn->type; dn++) {
+		struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
+
+		if (dn->type == CMN_TYPE_XP)
+			continue;
+		/* Ignore the extra components that will overlap on some ports */
+		if (dn->type < CMN_TYPE_HNI)
+			continue;
+
+		if (nid.x != x || nid.y != y || nid.port != p || nid.dev != d)
+			continue;
+
+		seq_printf(s, "   #%-2d  |", dn->logid);
+		return;
+	}
+	seq_puts(s, "        |");
+}
+
+static int arm_cmn_map_show(struct seq_file *s, void *data)
+{
+	struct arm_cmn *cmn = s->private;
+	int x, y, p, pmax = fls(cmn->ports_used);
+
+	seq_puts(s, "     X");
+	for (x = 0; x < cmn->mesh_x; x++)
+		seq_printf(s, "    %d    ", x);
+	seq_puts(s, "\nY P D+");
+	y = cmn->mesh_y;
+	while (y--) {
+		int xp_base = cmn->mesh_x * y;
+		u8 port[CMN_MAX_PORTS][CMN_MAX_DIMENSION];
+
+		for (x = 0; x < cmn->mesh_x; x++)
+			seq_puts(s, "--------+");
+
+		seq_printf(s, "\n%d    |", y);
+		for (x = 0; x < cmn->mesh_x; x++) {
+			struct arm_cmn_node *xp = cmn->xps + xp_base + x;
+
+			for (p = 0; p < CMN_MAX_PORTS; p++)
+				port[p][x] = arm_cmn_device_connect_info(cmn, xp, p);
+			seq_printf(s, " XP #%-2d |", xp_base + x);
+		}
+
+		seq_puts(s, "\n     |");
+		for (x = 0; x < cmn->mesh_x; x++) {
+			u8 dtc = cmn->xps[xp_base + x].dtc;
+
+			if (dtc & (dtc - 1))
+				seq_puts(s, " DTC ?? |");
+			else
+				seq_printf(s, " DTC %ld  |", __ffs(dtc));
+		}
+		seq_puts(s, "\n     |");
+		for (x = 0; x < cmn->mesh_x; x++)
+			seq_puts(s, "........|");
+
+		for (p = 0; p < pmax; p++) {
+			seq_printf(s, "\n  %d  |", p);
+			for (x = 0; x < cmn->mesh_x; x++)
+				seq_puts(s, arm_cmn_device_type(port[p][x]));
+			seq_puts(s, "\n    0|");
+			for (x = 0; x < cmn->mesh_x; x++)
+				arm_cmn_show_logid(s, x, y, p, 0);
+			seq_puts(s, "\n    1|");
+			for (x = 0; x < cmn->mesh_x; x++)
+				arm_cmn_show_logid(s, x, y, p, 1);
+		}
+		seq_puts(s, "\n-----+");
+	}
+	for (x = 0; x < cmn->mesh_x; x++)
+		seq_puts(s, "--------+");
+	seq_puts(s, "\n");
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(arm_cmn_map);
+
+static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id)
+{
+	const char *name  = "map";
+
+	if (id > 0)
+		name = devm_kasprintf(cmn->dev, GFP_KERNEL, "map_%d", id);
+	if (!name)
+		return;
+
+	cmn->debug = debugfs_create_file(name, 0444, arm_cmn_debugfs, cmn, &arm_cmn_map_fops);
+}
+#else
+static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id) {}
+#endif
+
+struct arm_cmn_hw_event {
+	struct arm_cmn_node *dn;
+	u64 dtm_idx[4];
+	unsigned int dtc_idx;
+	u8 dtcs_used;
+	u8 num_dns;
+	u8 dtm_offset;
+	bool wide_sel;
+	enum cmn_filter_select filter_sel;
+};
+
+#define for_each_hw_dn(hw, dn, i) \
+	for (i = 0, dn = hw->dn; i < hw->num_dns; i++, dn++)
+
+static struct arm_cmn_hw_event *to_cmn_hw(struct perf_event *event)
+{
+	BUILD_BUG_ON(sizeof(struct arm_cmn_hw_event) > offsetof(struct hw_perf_event, target));
+	return (struct arm_cmn_hw_event *)&event->hw;
+}
+
+static void arm_cmn_set_index(u64 x[], unsigned int pos, unsigned int val)
+{
+	x[pos / 32] |= (u64)val << ((pos % 32) * 2);
+}
+
+static unsigned int arm_cmn_get_index(u64 x[], unsigned int pos)
+{
+	return (x[pos / 32] >> ((pos % 32) * 2)) & 3;
+}
+
+struct arm_cmn_event_attr {
+	struct device_attribute attr;
+	enum cmn_model model;
+	enum cmn_node_type type;
+	enum cmn_filter_select fsel;
+	u16 eventid;
+	u8 occupid;
+};
+
+struct arm_cmn_format_attr {
+	struct device_attribute attr;
+	u64 field;
+	int config;
+};
+
+#define _CMN_EVENT_ATTR(_model, _name, _type, _eventid, _occupid, _fsel)\
+	(&((struct arm_cmn_event_attr[]) {{				\
+		.attr = __ATTR(_name, 0444, arm_cmn_event_show, NULL),	\
+		.model = _model,					\
+		.type = _type,						\
+		.eventid = _eventid,					\
+		.occupid = _occupid,					\
+		.fsel = _fsel,						\
+	}})[0].attr.attr)
+#define CMN_EVENT_ATTR(_model, _name, _type, _eventid)			\
+	_CMN_EVENT_ATTR(_model, _name, _type, _eventid, 0, SEL_NONE)
+
+static ssize_t arm_cmn_event_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct arm_cmn_event_attr *eattr;
+
+	eattr = container_of(attr, typeof(*eattr), attr);
+
+	if (eattr->type == CMN_TYPE_DTC)
+		return sysfs_emit(buf, "type=0x%x\n", eattr->type);
+
+	if (eattr->type == CMN_TYPE_WP)
+		return sysfs_emit(buf,
+				  "type=0x%x,eventid=0x%x,wp_dev_sel=?,wp_chn_sel=?,wp_grp=?,wp_val=?,wp_mask=?\n",
+				  eattr->type, eattr->eventid);
+
+	if (eattr->fsel > SEL_NONE)
+		return sysfs_emit(buf, "type=0x%x,eventid=0x%x,occupid=0x%x\n",
+				  eattr->type, eattr->eventid, eattr->occupid);
+
+	return sysfs_emit(buf, "type=0x%x,eventid=0x%x\n", eattr->type,
+			  eattr->eventid);
+}
+
+static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
+					     struct attribute *attr,
+					     int unused)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct arm_cmn *cmn = to_cmn(dev_get_drvdata(dev));
+	struct arm_cmn_event_attr *eattr;
+	enum cmn_node_type type;
+	u16 eventid;
+
+	eattr = container_of(attr, typeof(*eattr), attr.attr);
+
+	if (!(eattr->model & arm_cmn_model(cmn)))
+		return 0;
+
+	type = eattr->type;
+	eventid = eattr->eventid;
+
+	/* Watchpoints aren't nodes, so avoid confusion */
+	if (type == CMN_TYPE_WP)
+		return attr->mode;
+
+	/* Hide XP events for unused interfaces/channels */
+	if (type == CMN_TYPE_XP) {
+		unsigned int intf = (eventid >> 2) & 7;
+		unsigned int chan = eventid >> 5;
+
+		if ((intf & 4) && !(cmn->ports_used & BIT(intf & 3)))
+			return 0;
+
+		if (chan == 4 && cmn->part == PART_CMN600)
+			return 0;
+
+		if ((chan == 5 && cmn->rsp_vc_num < 2) ||
+		    (chan == 6 && cmn->dat_vc_num < 2) ||
+		    (chan == 7 && cmn->snp_vc_num < 2) ||
+		    (chan == 8 && cmn->req_vc_num < 2))
+			return 0;
+	}
+
+	/* Revision-specific differences */
+	if (cmn->part == PART_CMN600) {
+		if (cmn->rev < REV_CMN600_R1P3) {
+			if (type == CMN_TYPE_CXRA && eventid > 0x10)
+				return 0;
+		}
+		if (cmn->rev < REV_CMN600_R1P2) {
+			if (type == CMN_TYPE_HNF && eventid == 0x1b)
+				return 0;
+			if (type == CMN_TYPE_CXRA || type == CMN_TYPE_CXHA)
+				return 0;
+		}
+	} else if (cmn->part == PART_CMN650) {
+		if (cmn->rev < REV_CMN650_R2P0 || cmn->rev == REV_CMN650_R1P2) {
+			if (type == CMN_TYPE_HNF && eventid > 0x22)
+				return 0;
+			if (type == CMN_TYPE_SBSX && eventid == 0x17)
+				return 0;
+			if (type == CMN_TYPE_RNI && eventid > 0x10)
+				return 0;
+		}
+	} else if (cmn->part == PART_CMN700) {
+		if (cmn->rev < REV_CMN700_R2P0) {
+			if (type == CMN_TYPE_HNF && eventid > 0x2c)
+				return 0;
+			if (type == CMN_TYPE_CCHA && eventid > 0x74)
+				return 0;
+			if (type == CMN_TYPE_CCLA && eventid > 0x27)
+				return 0;
+		}
+		if (cmn->rev < REV_CMN700_R1P0) {
+			if (type == CMN_TYPE_HNF && eventid > 0x2b)
+				return 0;
+		}
+	}
+
+	if (!arm_cmn_node(cmn, type))
+		return 0;
+
+	return attr->mode;
+}
+
+#define _CMN_EVENT_DVM(_model, _name, _event, _occup, _fsel)	\
+	_CMN_EVENT_ATTR(_model, dn_##_name, CMN_TYPE_DVM, _event, _occup, _fsel)
+#define CMN_EVENT_DTC(_name)					\
+	CMN_EVENT_ATTR(CMN_ANY, dtc_##_name, CMN_TYPE_DTC, 0)
+#define CMN_EVENT_HNF(_model, _name, _event)			\
+	CMN_EVENT_ATTR(_model, hnf_##_name, CMN_TYPE_HNF, _event)
+#define CMN_EVENT_HNI(_name, _event)				\
+	CMN_EVENT_ATTR(CMN_ANY, hni_##_name, CMN_TYPE_HNI, _event)
+#define CMN_EVENT_HNP(_name, _event)				\
+	CMN_EVENT_ATTR(CMN_ANY, hnp_##_name, CMN_TYPE_HNP, _event)
+#define __CMN_EVENT_XP(_name, _event)				\
+	CMN_EVENT_ATTR(CMN_ANY, mxp_##_name, CMN_TYPE_XP, _event)
+#define CMN_EVENT_SBSX(_model, _name, _event)			\
+	CMN_EVENT_ATTR(_model, sbsx_##_name, CMN_TYPE_SBSX, _event)
+#define CMN_EVENT_RNID(_model, _name, _event)			\
+	CMN_EVENT_ATTR(_model, rnid_##_name, CMN_TYPE_RNI, _event)
+#define CMN_EVENT_MTSX(_name, _event)				\
+	CMN_EVENT_ATTR(CMN_ANY, mtsx_##_name, CMN_TYPE_MTSX, _event)
+#define CMN_EVENT_CXRA(_model, _name, _event)				\
+	CMN_EVENT_ATTR(_model, cxra_##_name, CMN_TYPE_CXRA, _event)
+#define CMN_EVENT_CXHA(_name, _event)				\
+	CMN_EVENT_ATTR(CMN_ANY, cxha_##_name, CMN_TYPE_CXHA, _event)
+#define CMN_EVENT_CCRA(_name, _event)				\
+	CMN_EVENT_ATTR(CMN_ANY, ccra_##_name, CMN_TYPE_CCRA, _event)
+#define CMN_EVENT_CCHA(_name, _event)				\
+	CMN_EVENT_ATTR(CMN_ANY, ccha_##_name, CMN_TYPE_CCHA, _event)
+#define CMN_EVENT_CCLA(_name, _event)				\
+	CMN_EVENT_ATTR(CMN_ANY, ccla_##_name, CMN_TYPE_CCLA, _event)
+#define CMN_EVENT_CCLA_RNI(_name, _event)				\
+	CMN_EVENT_ATTR(CMN_ANY, ccla_rni_##_name, CMN_TYPE_CCLA_RNI, _event)
+#define CMN_EVENT_HNS(_name, _event)				\
+	CMN_EVENT_ATTR(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event)
+
+#define CMN_EVENT_DVM(_model, _name, _event)			\
+	_CMN_EVENT_DVM(_model, _name, _event, 0, SEL_NONE)
+#define CMN_EVENT_DVM_OCC(_model, _name, _event)			\
+	_CMN_EVENT_DVM(_model, _name##_all, _event, 0, SEL_OCCUP1ID),	\
+	_CMN_EVENT_DVM(_model, _name##_dvmop, _event, 1, SEL_OCCUP1ID),	\
+	_CMN_EVENT_DVM(_model, _name##_dvmsync, _event, 2, SEL_OCCUP1ID)
+
+#define CMN_EVENT_HN_OCC(_model, _name, _type, _event)		\
+	_CMN_EVENT_ATTR(_model, _name##_all, _type, _event, 0, SEL_OCCUP1ID), \
+	_CMN_EVENT_ATTR(_model, _name##_read, _type, _event, 1, SEL_OCCUP1ID), \
+	_CMN_EVENT_ATTR(_model, _name##_write, _type, _event, 2, SEL_OCCUP1ID), \
+	_CMN_EVENT_ATTR(_model, _name##_atomic, _type, _event, 3, SEL_OCCUP1ID), \
+	_CMN_EVENT_ATTR(_model, _name##_stash, _type, _event, 4, SEL_OCCUP1ID)
+#define CMN_EVENT_HN_CLS(_model, _name, _type, _event)			\
+	_CMN_EVENT_ATTR(_model, _name##_class0, _type, _event, 0, SEL_CLASS_OCCUP_ID), \
+	_CMN_EVENT_ATTR(_model, _name##_class1, _type, _event, 1, SEL_CLASS_OCCUP_ID), \
+	_CMN_EVENT_ATTR(_model, _name##_class2, _type, _event, 2, SEL_CLASS_OCCUP_ID), \
+	_CMN_EVENT_ATTR(_model, _name##_class3, _type, _event, 3, SEL_CLASS_OCCUP_ID)
+#define CMN_EVENT_HN_SNT(_model, _name, _type, _event)			\
+	_CMN_EVENT_ATTR(_model, _name##_all, _type, _event, 0, SEL_CBUSY_SNTHROTTLE_SEL), \
+	_CMN_EVENT_ATTR(_model, _name##_group0_read, _type, _event, 1, SEL_CBUSY_SNTHROTTLE_SEL), \
+	_CMN_EVENT_ATTR(_model, _name##_group0_write, _type, _event, 2, SEL_CBUSY_SNTHROTTLE_SEL), \
+	_CMN_EVENT_ATTR(_model, _name##_group1_read, _type, _event, 3, SEL_CBUSY_SNTHROTTLE_SEL), \
+	_CMN_EVENT_ATTR(_model, _name##_group1_write, _type, _event, 4, SEL_CBUSY_SNTHROTTLE_SEL), \
+	_CMN_EVENT_ATTR(_model, _name##_read, _type, _event, 5, SEL_CBUSY_SNTHROTTLE_SEL), \
+	_CMN_EVENT_ATTR(_model, _name##_write, _type, _event, 6, SEL_CBUSY_SNTHROTTLE_SEL)
+
+#define CMN_EVENT_HNF_OCC(_model, _name, _event)			\
+	CMN_EVENT_HN_OCC(_model, hnf_##_name, CMN_TYPE_HNF, _event)
+#define CMN_EVENT_HNF_CLS(_model, _name, _event)			\
+	CMN_EVENT_HN_CLS(_model, hnf_##_name, CMN_TYPE_HNF, _event)
+#define CMN_EVENT_HNF_SNT(_model, _name, _event)			\
+	CMN_EVENT_HN_SNT(_model, hnf_##_name, CMN_TYPE_HNF, _event)
+
+#define CMN_EVENT_HNS_OCC(_name, _event)				\
+	CMN_EVENT_HN_OCC(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event),	\
+	_CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_rxsnp, CMN_TYPE_HNS, _event, 5, SEL_OCCUP1ID), \
+	_CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_lbt, CMN_TYPE_HNS, _event, 6, SEL_OCCUP1ID), \
+	_CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_hbt, CMN_TYPE_HNS, _event, 7, SEL_OCCUP1ID)
+#define CMN_EVENT_HNS_CLS( _name, _event)				\
+	CMN_EVENT_HN_CLS(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event)
+#define CMN_EVENT_HNS_SNT(_name, _event)				\
+	CMN_EVENT_HN_SNT(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event)
+#define CMN_EVENT_HNS_HBT(_name, _event)				\
+	_CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_all, CMN_TYPE_HNS, _event, 0, SEL_HBT_LBT_SEL), \
+	_CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_hbt, CMN_TYPE_HNS, _event, 1, SEL_HBT_LBT_SEL), \
+	_CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_lbt, CMN_TYPE_HNS, _event, 2, SEL_HBT_LBT_SEL)
+#define CMN_EVENT_HNS_SNH(_name, _event)				\
+	_CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_all, CMN_TYPE_HNS, _event, 0, SEL_SN_HOME_SEL), \
+	_CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_sn, CMN_TYPE_HNS, _event, 1, SEL_SN_HOME_SEL), \
+	_CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_home, CMN_TYPE_HNS, _event, 2, SEL_SN_HOME_SEL)
+
+#define _CMN_EVENT_XP_MESH(_name, _event)			\
+	__CMN_EVENT_XP(e_##_name, (_event) | (0 << 2)),		\
+	__CMN_EVENT_XP(w_##_name, (_event) | (1 << 2)),		\
+	__CMN_EVENT_XP(n_##_name, (_event) | (2 << 2)),		\
+	__CMN_EVENT_XP(s_##_name, (_event) | (3 << 2))
+
+#define _CMN_EVENT_XP_PORT(_name, _event)			\
+	__CMN_EVENT_XP(p0_##_name, (_event) | (4 << 2)),	\
+	__CMN_EVENT_XP(p1_##_name, (_event) | (5 << 2)),	\
+	__CMN_EVENT_XP(p2_##_name, (_event) | (6 << 2)),	\
+	__CMN_EVENT_XP(p3_##_name, (_event) | (7 << 2))
+
+#define _CMN_EVENT_XP(_name, _event)				\
+	_CMN_EVENT_XP_MESH(_name, _event),			\
+	_CMN_EVENT_XP_PORT(_name, _event)
+
+/* Good thing there are only 3 fundamental XP events... */
+#define CMN_EVENT_XP(_name, _event)				\
+	_CMN_EVENT_XP(req_##_name, (_event) | (0 << 5)),	\
+	_CMN_EVENT_XP(rsp_##_name, (_event) | (1 << 5)),	\
+	_CMN_EVENT_XP(snp_##_name, (_event) | (2 << 5)),	\
+	_CMN_EVENT_XP(dat_##_name, (_event) | (3 << 5)),	\
+	_CMN_EVENT_XP(pub_##_name, (_event) | (4 << 5)),	\
+	_CMN_EVENT_XP(rsp2_##_name, (_event) | (5 << 5)),	\
+	_CMN_EVENT_XP(dat2_##_name, (_event) | (6 << 5)),	\
+	_CMN_EVENT_XP(snp2_##_name, (_event) | (7 << 5)),	\
+	_CMN_EVENT_XP(req2_##_name, (_event) | (8 << 5))
+
+#define CMN_EVENT_XP_DAT(_name, _event)				\
+	_CMN_EVENT_XP_PORT(dat_##_name, (_event) | (3 << 5)),	\
+	_CMN_EVENT_XP_PORT(dat2_##_name, (_event) | (6 << 5))
+
+
+static struct attribute *arm_cmn_event_attrs[] = {
+	CMN_EVENT_DTC(cycles),
+
+	/*
+	 * DVM node events conflict with HN-I events in the equivalent PMU
+	 * slot, but our lazy short-cut of using the DTM counter index for
+	 * the PMU index as well happens to avoid that by construction.
+	 */
+	CMN_EVENT_DVM(CMN600, rxreq_dvmop,		0x01),
+	CMN_EVENT_DVM(CMN600, rxreq_dvmsync,		0x02),
+	CMN_EVENT_DVM(CMN600, rxreq_dvmop_vmid_filtered, 0x03),
+	CMN_EVENT_DVM(CMN600, rxreq_retried,		0x04),
+	CMN_EVENT_DVM_OCC(CMN600, rxreq_trk_occupancy,	0x05),
+	CMN_EVENT_DVM(NOT_CMN600, dvmop_tlbi,		0x01),
+	CMN_EVENT_DVM(NOT_CMN600, dvmop_bpi,		0x02),
+	CMN_EVENT_DVM(NOT_CMN600, dvmop_pici,		0x03),
+	CMN_EVENT_DVM(NOT_CMN600, dvmop_vici,		0x04),
+	CMN_EVENT_DVM(NOT_CMN600, dvmsync,		0x05),
+	CMN_EVENT_DVM(NOT_CMN600, vmid_filtered,	0x06),
+	CMN_EVENT_DVM(NOT_CMN600, rndop_filtered,	0x07),
+	CMN_EVENT_DVM(NOT_CMN600, retry,		0x08),
+	CMN_EVENT_DVM(NOT_CMN600, txsnp_flitv,		0x09),
+	CMN_EVENT_DVM(NOT_CMN600, txsnp_stall,		0x0a),
+	CMN_EVENT_DVM(NOT_CMN600, trkfull,		0x0b),
+	CMN_EVENT_DVM_OCC(NOT_CMN600, trk_occupancy,	0x0c),
+	CMN_EVENT_DVM_OCC(CMN700, trk_occupancy_cxha,	0x0d),
+	CMN_EVENT_DVM_OCC(CMN700, trk_occupancy_pdn,	0x0e),
+	CMN_EVENT_DVM(CMN700, trk_alloc,		0x0f),
+	CMN_EVENT_DVM(CMN700, trk_cxha_alloc,		0x10),
+	CMN_EVENT_DVM(CMN700, trk_pdn_alloc,		0x11),
+	CMN_EVENT_DVM(CMN700, txsnp_stall_limit,	0x12),
+	CMN_EVENT_DVM(CMN700, rxsnp_stall_starv,	0x13),
+	CMN_EVENT_DVM(CMN700, txsnp_sync_stall_op,	0x14),
+
+	CMN_EVENT_HNF(CMN_ANY, cache_miss,		0x01),
+	CMN_EVENT_HNF(CMN_ANY, slc_sf_cache_access,	0x02),
+	CMN_EVENT_HNF(CMN_ANY, cache_fill,		0x03),
+	CMN_EVENT_HNF(CMN_ANY, pocq_retry,		0x04),
+	CMN_EVENT_HNF(CMN_ANY, pocq_reqs_recvd,		0x05),
+	CMN_EVENT_HNF(CMN_ANY, sf_hit,			0x06),
+	CMN_EVENT_HNF(CMN_ANY, sf_evictions,		0x07),
+	CMN_EVENT_HNF(CMN_ANY, dir_snoops_sent,		0x08),
+	CMN_EVENT_HNF(CMN_ANY, brd_snoops_sent,		0x09),
+	CMN_EVENT_HNF(CMN_ANY, slc_eviction,		0x0a),
+	CMN_EVENT_HNF(CMN_ANY, slc_fill_invalid_way,	0x0b),
+	CMN_EVENT_HNF(CMN_ANY, mc_retries,		0x0c),
+	CMN_EVENT_HNF(CMN_ANY, mc_reqs,			0x0d),
+	CMN_EVENT_HNF(CMN_ANY, qos_hh_retry,		0x0e),
+	CMN_EVENT_HNF_OCC(CMN_ANY, qos_pocq_occupancy,	0x0f),
+	CMN_EVENT_HNF(CMN_ANY, pocq_addrhaz,		0x10),
+	CMN_EVENT_HNF(CMN_ANY, pocq_atomic_addrhaz,	0x11),
+	CMN_EVENT_HNF(CMN_ANY, ld_st_swp_adq_full,	0x12),
+	CMN_EVENT_HNF(CMN_ANY, cmp_adq_full,		0x13),
+	CMN_EVENT_HNF(CMN_ANY, txdat_stall,		0x14),
+	CMN_EVENT_HNF(CMN_ANY, txrsp_stall,		0x15),
+	CMN_EVENT_HNF(CMN_ANY, seq_full,		0x16),
+	CMN_EVENT_HNF(CMN_ANY, seq_hit,			0x17),
+	CMN_EVENT_HNF(CMN_ANY, snp_sent,		0x18),
+	CMN_EVENT_HNF(CMN_ANY, sfbi_dir_snp_sent,	0x19),
+	CMN_EVENT_HNF(CMN_ANY, sfbi_brd_snp_sent,	0x1a),
+	CMN_EVENT_HNF(CMN_ANY, snp_sent_untrk,		0x1b),
+	CMN_EVENT_HNF(CMN_ANY, intv_dirty,		0x1c),
+	CMN_EVENT_HNF(CMN_ANY, stash_snp_sent,		0x1d),
+	CMN_EVENT_HNF(CMN_ANY, stash_data_pull,		0x1e),
+	CMN_EVENT_HNF(CMN_ANY, snp_fwded,		0x1f),
+	CMN_EVENT_HNF(NOT_CMN600, atomic_fwd,		0x20),
+	CMN_EVENT_HNF(NOT_CMN600, mpam_hardlim,		0x21),
+	CMN_EVENT_HNF(NOT_CMN600, mpam_softlim,		0x22),
+	CMN_EVENT_HNF(CMN_650ON, snp_sent_cluster,	0x23),
+	CMN_EVENT_HNF(CMN_650ON, sf_imprecise_evict,	0x24),
+	CMN_EVENT_HNF(CMN_650ON, sf_evict_shared_line,	0x25),
+	CMN_EVENT_HNF_CLS(CMN700, pocq_class_occup,	0x26),
+	CMN_EVENT_HNF_CLS(CMN700, pocq_class_retry,	0x27),
+	CMN_EVENT_HNF_CLS(CMN700, class_mc_reqs,	0x28),
+	CMN_EVENT_HNF_CLS(CMN700, class_cgnt_cmin,	0x29),
+	CMN_EVENT_HNF_SNT(CMN700, sn_throttle,		0x2a),
+	CMN_EVENT_HNF_SNT(CMN700, sn_throttle_min,	0x2b),
+	CMN_EVENT_HNF(CMN700, sf_precise_to_imprecise,	0x2c),
+	CMN_EVENT_HNF(CMN700, snp_intv_cln,		0x2d),
+	CMN_EVENT_HNF(CMN700, nc_excl,			0x2e),
+	CMN_EVENT_HNF(CMN700, excl_mon_ovfl,		0x2f),
+
+	CMN_EVENT_HNI(rrt_rd_occ_cnt_ovfl,		0x20),
+	CMN_EVENT_HNI(rrt_wr_occ_cnt_ovfl,		0x21),
+	CMN_EVENT_HNI(rdt_rd_occ_cnt_ovfl,		0x22),
+	CMN_EVENT_HNI(rdt_wr_occ_cnt_ovfl,		0x23),
+	CMN_EVENT_HNI(wdb_occ_cnt_ovfl,			0x24),
+	CMN_EVENT_HNI(rrt_rd_alloc,			0x25),
+	CMN_EVENT_HNI(rrt_wr_alloc,			0x26),
+	CMN_EVENT_HNI(rdt_rd_alloc,			0x27),
+	CMN_EVENT_HNI(rdt_wr_alloc,			0x28),
+	CMN_EVENT_HNI(wdb_alloc,			0x29),
+	CMN_EVENT_HNI(txrsp_retryack,			0x2a),
+	CMN_EVENT_HNI(arvalid_no_arready,		0x2b),
+	CMN_EVENT_HNI(arready_no_arvalid,		0x2c),
+	CMN_EVENT_HNI(awvalid_no_awready,		0x2d),
+	CMN_EVENT_HNI(awready_no_awvalid,		0x2e),
+	CMN_EVENT_HNI(wvalid_no_wready,			0x2f),
+	CMN_EVENT_HNI(txdat_stall,			0x30),
+	CMN_EVENT_HNI(nonpcie_serialization,		0x31),
+	CMN_EVENT_HNI(pcie_serialization,		0x32),
+
+	/*
+	 * HN-P events squat on top of the HN-I similarly to DVM events, except
+	 * for being crammed into the same physical node as well. And of course
+	 * where would the fun be if the same events were in the same order...
+	 */
+	CMN_EVENT_HNP(rrt_wr_occ_cnt_ovfl,		0x01),
+	CMN_EVENT_HNP(rdt_wr_occ_cnt_ovfl,		0x02),
+	CMN_EVENT_HNP(wdb_occ_cnt_ovfl,			0x03),
+	CMN_EVENT_HNP(rrt_wr_alloc,			0x04),
+	CMN_EVENT_HNP(rdt_wr_alloc,			0x05),
+	CMN_EVENT_HNP(wdb_alloc,			0x06),
+	CMN_EVENT_HNP(awvalid_no_awready,		0x07),
+	CMN_EVENT_HNP(awready_no_awvalid,		0x08),
+	CMN_EVENT_HNP(wvalid_no_wready,			0x09),
+	CMN_EVENT_HNP(rrt_rd_occ_cnt_ovfl,		0x11),
+	CMN_EVENT_HNP(rdt_rd_occ_cnt_ovfl,		0x12),
+	CMN_EVENT_HNP(rrt_rd_alloc,			0x13),
+	CMN_EVENT_HNP(rdt_rd_alloc,			0x14),
+	CMN_EVENT_HNP(arvalid_no_arready,		0x15),
+	CMN_EVENT_HNP(arready_no_arvalid,		0x16),
+
+	CMN_EVENT_XP(txflit_valid,			0x01),
+	CMN_EVENT_XP(txflit_stall,			0x02),
+	CMN_EVENT_XP_DAT(partial_dat_flit,		0x03),
+	/* We treat watchpoints as a special made-up class of XP events */
+	CMN_EVENT_ATTR(CMN_ANY, watchpoint_up, CMN_TYPE_WP, CMN_WP_UP),
+	CMN_EVENT_ATTR(CMN_ANY, watchpoint_down, CMN_TYPE_WP, CMN_WP_DOWN),
+
+	CMN_EVENT_SBSX(CMN_ANY, rd_req,			0x01),
+	CMN_EVENT_SBSX(CMN_ANY, wr_req,			0x02),
+	CMN_EVENT_SBSX(CMN_ANY, cmo_req,		0x03),
+	CMN_EVENT_SBSX(CMN_ANY, txrsp_retryack,		0x04),
+	CMN_EVENT_SBSX(CMN_ANY, txdat_flitv,		0x05),
+	CMN_EVENT_SBSX(CMN_ANY, txrsp_flitv,		0x06),
+	CMN_EVENT_SBSX(CMN_ANY, rd_req_trkr_occ_cnt_ovfl, 0x11),
+	CMN_EVENT_SBSX(CMN_ANY, wr_req_trkr_occ_cnt_ovfl, 0x12),
+	CMN_EVENT_SBSX(CMN_ANY, cmo_req_trkr_occ_cnt_ovfl, 0x13),
+	CMN_EVENT_SBSX(CMN_ANY, wdb_occ_cnt_ovfl,	0x14),
+	CMN_EVENT_SBSX(CMN_ANY, rd_axi_trkr_occ_cnt_ovfl, 0x15),
+	CMN_EVENT_SBSX(CMN_ANY, cmo_axi_trkr_occ_cnt_ovfl, 0x16),
+	CMN_EVENT_SBSX(NOT_CMN600, rdb_occ_cnt_ovfl,	0x17),
+	CMN_EVENT_SBSX(CMN_ANY, arvalid_no_arready,	0x21),
+	CMN_EVENT_SBSX(CMN_ANY, awvalid_no_awready,	0x22),
+	CMN_EVENT_SBSX(CMN_ANY, wvalid_no_wready,	0x23),
+	CMN_EVENT_SBSX(CMN_ANY, txdat_stall,		0x24),
+	CMN_EVENT_SBSX(CMN_ANY, txrsp_stall,		0x25),
+
+	CMN_EVENT_RNID(CMN_ANY, s0_rdata_beats,		0x01),
+	CMN_EVENT_RNID(CMN_ANY, s1_rdata_beats,		0x02),
+	CMN_EVENT_RNID(CMN_ANY, s2_rdata_beats,		0x03),
+	CMN_EVENT_RNID(CMN_ANY, rxdat_flits,		0x04),
+	CMN_EVENT_RNID(CMN_ANY, txdat_flits,		0x05),
+	CMN_EVENT_RNID(CMN_ANY, txreq_flits_total,	0x06),
+	CMN_EVENT_RNID(CMN_ANY, txreq_flits_retried,	0x07),
+	CMN_EVENT_RNID(CMN_ANY, rrt_occ_ovfl,		0x08),
+	CMN_EVENT_RNID(CMN_ANY, wrt_occ_ovfl,		0x09),
+	CMN_EVENT_RNID(CMN_ANY, txreq_flits_replayed,	0x0a),
+	CMN_EVENT_RNID(CMN_ANY, wrcancel_sent,		0x0b),
+	CMN_EVENT_RNID(CMN_ANY, s0_wdata_beats,		0x0c),
+	CMN_EVENT_RNID(CMN_ANY, s1_wdata_beats,		0x0d),
+	CMN_EVENT_RNID(CMN_ANY, s2_wdata_beats,		0x0e),
+	CMN_EVENT_RNID(CMN_ANY, rrt_alloc,		0x0f),
+	CMN_EVENT_RNID(CMN_ANY, wrt_alloc,		0x10),
+	CMN_EVENT_RNID(CMN600, rdb_unord,		0x11),
+	CMN_EVENT_RNID(CMN600, rdb_replay,		0x12),
+	CMN_EVENT_RNID(CMN600, rdb_hybrid,		0x13),
+	CMN_EVENT_RNID(CMN600, rdb_ord,			0x14),
+	CMN_EVENT_RNID(NOT_CMN600, padb_occ_ovfl,	0x11),
+	CMN_EVENT_RNID(NOT_CMN600, rpdb_occ_ovfl,	0x12),
+	CMN_EVENT_RNID(NOT_CMN600, rrt_occup_ovfl_slice1, 0x13),
+	CMN_EVENT_RNID(NOT_CMN600, rrt_occup_ovfl_slice2, 0x14),
+	CMN_EVENT_RNID(NOT_CMN600, rrt_occup_ovfl_slice3, 0x15),
+	CMN_EVENT_RNID(NOT_CMN600, wrt_throttled,	0x16),
+	CMN_EVENT_RNID(CMN700, ldb_full,		0x17),
+	CMN_EVENT_RNID(CMN700, rrt_rd_req_occup_ovfl_slice0, 0x18),
+	CMN_EVENT_RNID(CMN700, rrt_rd_req_occup_ovfl_slice1, 0x19),
+	CMN_EVENT_RNID(CMN700, rrt_rd_req_occup_ovfl_slice2, 0x1a),
+	CMN_EVENT_RNID(CMN700, rrt_rd_req_occup_ovfl_slice3, 0x1b),
+	CMN_EVENT_RNID(CMN700, rrt_burst_occup_ovfl_slice0, 0x1c),
+	CMN_EVENT_RNID(CMN700, rrt_burst_occup_ovfl_slice1, 0x1d),
+	CMN_EVENT_RNID(CMN700, rrt_burst_occup_ovfl_slice2, 0x1e),
+	CMN_EVENT_RNID(CMN700, rrt_burst_occup_ovfl_slice3, 0x1f),
+	CMN_EVENT_RNID(CMN700, rrt_burst_alloc,		0x20),
+	CMN_EVENT_RNID(CMN700, awid_hash,		0x21),
+	CMN_EVENT_RNID(CMN700, atomic_alloc,		0x22),
+	CMN_EVENT_RNID(CMN700, atomic_occ_ovfl,		0x23),
+
+	CMN_EVENT_MTSX(tc_lookup,			0x01),
+	CMN_EVENT_MTSX(tc_fill,				0x02),
+	CMN_EVENT_MTSX(tc_miss,				0x03),
+	CMN_EVENT_MTSX(tdb_forward,			0x04),
+	CMN_EVENT_MTSX(tcq_hazard,			0x05),
+	CMN_EVENT_MTSX(tcq_rd_alloc,			0x06),
+	CMN_EVENT_MTSX(tcq_wr_alloc,			0x07),
+	CMN_EVENT_MTSX(tcq_cmo_alloc,			0x08),
+	CMN_EVENT_MTSX(axi_rd_req,			0x09),
+	CMN_EVENT_MTSX(axi_wr_req,			0x0a),
+	CMN_EVENT_MTSX(tcq_occ_cnt_ovfl,		0x0b),
+	CMN_EVENT_MTSX(tdb_occ_cnt_ovfl,		0x0c),
+
+	CMN_EVENT_CXRA(CMN_ANY, rht_occ,		0x01),
+	CMN_EVENT_CXRA(CMN_ANY, sht_occ,		0x02),
+	CMN_EVENT_CXRA(CMN_ANY, rdb_occ,		0x03),
+	CMN_EVENT_CXRA(CMN_ANY, wdb_occ,		0x04),
+	CMN_EVENT_CXRA(CMN_ANY, ssb_occ,		0x05),
+	CMN_EVENT_CXRA(CMN_ANY, snp_bcasts,		0x06),
+	CMN_EVENT_CXRA(CMN_ANY, req_chains,		0x07),
+	CMN_EVENT_CXRA(CMN_ANY, req_chain_avglen,	0x08),
+	CMN_EVENT_CXRA(CMN_ANY, chirsp_stalls,		0x09),
+	CMN_EVENT_CXRA(CMN_ANY, chidat_stalls,		0x0a),
+	CMN_EVENT_CXRA(CMN_ANY, cxreq_pcrd_stalls_link0, 0x0b),
+	CMN_EVENT_CXRA(CMN_ANY, cxreq_pcrd_stalls_link1, 0x0c),
+	CMN_EVENT_CXRA(CMN_ANY, cxreq_pcrd_stalls_link2, 0x0d),
+	CMN_EVENT_CXRA(CMN_ANY, cxdat_pcrd_stalls_link0, 0x0e),
+	CMN_EVENT_CXRA(CMN_ANY, cxdat_pcrd_stalls_link1, 0x0f),
+	CMN_EVENT_CXRA(CMN_ANY, cxdat_pcrd_stalls_link2, 0x10),
+	CMN_EVENT_CXRA(CMN_ANY, external_chirsp_stalls,	0x11),
+	CMN_EVENT_CXRA(CMN_ANY, external_chidat_stalls,	0x12),
+	CMN_EVENT_CXRA(NOT_CMN600, cxmisc_pcrd_stalls_link0, 0x13),
+	CMN_EVENT_CXRA(NOT_CMN600, cxmisc_pcrd_stalls_link1, 0x14),
+	CMN_EVENT_CXRA(NOT_CMN600, cxmisc_pcrd_stalls_link2, 0x15),
+
+	CMN_EVENT_CXHA(rddatbyp,			0x21),
+	CMN_EVENT_CXHA(chirsp_up_stall,			0x22),
+	CMN_EVENT_CXHA(chidat_up_stall,			0x23),
+	CMN_EVENT_CXHA(snppcrd_link0_stall,		0x24),
+	CMN_EVENT_CXHA(snppcrd_link1_stall,		0x25),
+	CMN_EVENT_CXHA(snppcrd_link2_stall,		0x26),
+	CMN_EVENT_CXHA(reqtrk_occ,			0x27),
+	CMN_EVENT_CXHA(rdb_occ,				0x28),
+	CMN_EVENT_CXHA(rdbyp_occ,			0x29),
+	CMN_EVENT_CXHA(wdb_occ,				0x2a),
+	CMN_EVENT_CXHA(snptrk_occ,			0x2b),
+	CMN_EVENT_CXHA(sdb_occ,				0x2c),
+	CMN_EVENT_CXHA(snphaz_occ,			0x2d),
+
+	CMN_EVENT_CCRA(rht_occ,				0x41),
+	CMN_EVENT_CCRA(sht_occ,				0x42),
+	CMN_EVENT_CCRA(rdb_occ,				0x43),
+	CMN_EVENT_CCRA(wdb_occ,				0x44),
+	CMN_EVENT_CCRA(ssb_occ,				0x45),
+	CMN_EVENT_CCRA(snp_bcasts,			0x46),
+	CMN_EVENT_CCRA(req_chains,			0x47),
+	CMN_EVENT_CCRA(req_chain_avglen,		0x48),
+	CMN_EVENT_CCRA(chirsp_stalls,			0x49),
+	CMN_EVENT_CCRA(chidat_stalls,			0x4a),
+	CMN_EVENT_CCRA(cxreq_pcrd_stalls_link0,		0x4b),
+	CMN_EVENT_CCRA(cxreq_pcrd_stalls_link1,		0x4c),
+	CMN_EVENT_CCRA(cxreq_pcrd_stalls_link2,		0x4d),
+	CMN_EVENT_CCRA(cxdat_pcrd_stalls_link0,		0x4e),
+	CMN_EVENT_CCRA(cxdat_pcrd_stalls_link1,		0x4f),
+	CMN_EVENT_CCRA(cxdat_pcrd_stalls_link2,		0x50),
+	CMN_EVENT_CCRA(external_chirsp_stalls,		0x51),
+	CMN_EVENT_CCRA(external_chidat_stalls,		0x52),
+	CMN_EVENT_CCRA(cxmisc_pcrd_stalls_link0,	0x53),
+	CMN_EVENT_CCRA(cxmisc_pcrd_stalls_link1,	0x54),
+	CMN_EVENT_CCRA(cxmisc_pcrd_stalls_link2,	0x55),
+	CMN_EVENT_CCRA(rht_alloc,			0x56),
+	CMN_EVENT_CCRA(sht_alloc,			0x57),
+	CMN_EVENT_CCRA(rdb_alloc,			0x58),
+	CMN_EVENT_CCRA(wdb_alloc,			0x59),
+	CMN_EVENT_CCRA(ssb_alloc,			0x5a),
+
+	CMN_EVENT_CCHA(rddatbyp,			0x61),
+	CMN_EVENT_CCHA(chirsp_up_stall,			0x62),
+	CMN_EVENT_CCHA(chidat_up_stall,			0x63),
+	CMN_EVENT_CCHA(snppcrd_link0_stall,		0x64),
+	CMN_EVENT_CCHA(snppcrd_link1_stall,		0x65),
+	CMN_EVENT_CCHA(snppcrd_link2_stall,		0x66),
+	CMN_EVENT_CCHA(reqtrk_occ,			0x67),
+	CMN_EVENT_CCHA(rdb_occ,				0x68),
+	CMN_EVENT_CCHA(rdbyp_occ,			0x69),
+	CMN_EVENT_CCHA(wdb_occ,				0x6a),
+	CMN_EVENT_CCHA(snptrk_occ,			0x6b),
+	CMN_EVENT_CCHA(sdb_occ,				0x6c),
+	CMN_EVENT_CCHA(snphaz_occ,			0x6d),
+	CMN_EVENT_CCHA(reqtrk_alloc,			0x6e),
+	CMN_EVENT_CCHA(rdb_alloc,			0x6f),
+	CMN_EVENT_CCHA(rdbyp_alloc,			0x70),
+	CMN_EVENT_CCHA(wdb_alloc,			0x71),
+	CMN_EVENT_CCHA(snptrk_alloc,			0x72),
+	CMN_EVENT_CCHA(sdb_alloc,			0x73),
+	CMN_EVENT_CCHA(snphaz_alloc,			0x74),
+	CMN_EVENT_CCHA(pb_rhu_req_occ,			0x75),
+	CMN_EVENT_CCHA(pb_rhu_req_alloc,		0x76),
+	CMN_EVENT_CCHA(pb_rhu_pcie_req_occ,		0x77),
+	CMN_EVENT_CCHA(pb_rhu_pcie_req_alloc,		0x78),
+	CMN_EVENT_CCHA(pb_pcie_wr_req_occ,		0x79),
+	CMN_EVENT_CCHA(pb_pcie_wr_req_alloc,		0x7a),
+	CMN_EVENT_CCHA(pb_pcie_reg_req_occ,		0x7b),
+	CMN_EVENT_CCHA(pb_pcie_reg_req_alloc,		0x7c),
+	CMN_EVENT_CCHA(pb_pcie_rsvd_req_occ,		0x7d),
+	CMN_EVENT_CCHA(pb_pcie_rsvd_req_alloc,		0x7e),
+	CMN_EVENT_CCHA(pb_rhu_dat_occ,			0x7f),
+	CMN_EVENT_CCHA(pb_rhu_dat_alloc,		0x80),
+	CMN_EVENT_CCHA(pb_rhu_pcie_dat_occ,		0x81),
+	CMN_EVENT_CCHA(pb_rhu_pcie_dat_alloc,		0x82),
+	CMN_EVENT_CCHA(pb_pcie_wr_dat_occ,		0x83),
+	CMN_EVENT_CCHA(pb_pcie_wr_dat_alloc,		0x84),
+
+	CMN_EVENT_CCLA(rx_cxs,				0x21),
+	CMN_EVENT_CCLA(tx_cxs,				0x22),
+	CMN_EVENT_CCLA(rx_cxs_avg_size,			0x23),
+	CMN_EVENT_CCLA(tx_cxs_avg_size,			0x24),
+	CMN_EVENT_CCLA(tx_cxs_lcrd_backpressure,	0x25),
+	CMN_EVENT_CCLA(link_crdbuf_occ,			0x26),
+	CMN_EVENT_CCLA(link_crdbuf_alloc,		0x27),
+	CMN_EVENT_CCLA(pfwd_rcvr_cxs,			0x28),
+	CMN_EVENT_CCLA(pfwd_sndr_num_flits,		0x29),
+	CMN_EVENT_CCLA(pfwd_sndr_stalls_static_crd,	0x2a),
+	CMN_EVENT_CCLA(pfwd_sndr_stalls_dynmaic_crd,	0x2b),
+
+	CMN_EVENT_HNS_HBT(cache_miss,			0x01),
+	CMN_EVENT_HNS_HBT(slc_sf_cache_access,		0x02),
+	CMN_EVENT_HNS_HBT(cache_fill,			0x03),
+	CMN_EVENT_HNS_HBT(pocq_retry,			0x04),
+	CMN_EVENT_HNS_HBT(pocq_reqs_recvd,		0x05),
+	CMN_EVENT_HNS_HBT(sf_hit,			0x06),
+	CMN_EVENT_HNS_HBT(sf_evictions,			0x07),
+	CMN_EVENT_HNS(dir_snoops_sent,			0x08),
+	CMN_EVENT_HNS(brd_snoops_sent,			0x09),
+	CMN_EVENT_HNS_HBT(slc_eviction,			0x0a),
+	CMN_EVENT_HNS_HBT(slc_fill_invalid_way,		0x0b),
+	CMN_EVENT_HNS(mc_retries_local,			0x0c),
+	CMN_EVENT_HNS_SNH(mc_reqs_local,		0x0d),
+	CMN_EVENT_HNS(qos_hh_retry,			0x0e),
+	CMN_EVENT_HNS_OCC(qos_pocq_occupancy,		0x0f),
+	CMN_EVENT_HNS(pocq_addrhaz,			0x10),
+	CMN_EVENT_HNS(pocq_atomic_addrhaz,		0x11),
+	CMN_EVENT_HNS(ld_st_swp_adq_full,		0x12),
+	CMN_EVENT_HNS(cmp_adq_full,			0x13),
+	CMN_EVENT_HNS(txdat_stall,			0x14),
+	CMN_EVENT_HNS(txrsp_stall,			0x15),
+	CMN_EVENT_HNS(seq_full,				0x16),
+	CMN_EVENT_HNS(seq_hit,				0x17),
+	CMN_EVENT_HNS(snp_sent,				0x18),
+	CMN_EVENT_HNS(sfbi_dir_snp_sent,		0x19),
+	CMN_EVENT_HNS(sfbi_brd_snp_sent,		0x1a),
+	CMN_EVENT_HNS(intv_dirty,			0x1c),
+	CMN_EVENT_HNS(stash_snp_sent,			0x1d),
+	CMN_EVENT_HNS(stash_data_pull,			0x1e),
+	CMN_EVENT_HNS(snp_fwded,			0x1f),
+	CMN_EVENT_HNS(atomic_fwd,			0x20),
+	CMN_EVENT_HNS(mpam_hardlim,			0x21),
+	CMN_EVENT_HNS(mpam_softlim,			0x22),
+	CMN_EVENT_HNS(snp_sent_cluster,			0x23),
+	CMN_EVENT_HNS(sf_imprecise_evict,		0x24),
+	CMN_EVENT_HNS(sf_evict_shared_line,		0x25),
+	CMN_EVENT_HNS_CLS(pocq_class_occup,		0x26),
+	CMN_EVENT_HNS_CLS(pocq_class_retry,		0x27),
+	CMN_EVENT_HNS_CLS(class_mc_reqs_local,		0x28),
+	CMN_EVENT_HNS_CLS(class_cgnt_cmin,		0x29),
+	CMN_EVENT_HNS_SNT(sn_throttle,			0x2a),
+	CMN_EVENT_HNS_SNT(sn_throttle_min,		0x2b),
+	CMN_EVENT_HNS(sf_precise_to_imprecise,		0x2c),
+	CMN_EVENT_HNS(snp_intv_cln,			0x2d),
+	CMN_EVENT_HNS(nc_excl,				0x2e),
+	CMN_EVENT_HNS(excl_mon_ovfl,			0x2f),
+	CMN_EVENT_HNS(snp_req_recvd,			0x30),
+	CMN_EVENT_HNS(snp_req_byp_pocq,			0x31),
+	CMN_EVENT_HNS(dir_ccgha_snp_sent,		0x32),
+	CMN_EVENT_HNS(brd_ccgha_snp_sent,		0x33),
+	CMN_EVENT_HNS(ccgha_snp_stall,			0x34),
+	CMN_EVENT_HNS(lbt_req_hardlim,			0x35),
+	CMN_EVENT_HNS(hbt_req_hardlim,			0x36),
+	CMN_EVENT_HNS(sf_reupdate,			0x37),
+	CMN_EVENT_HNS(excl_sf_imprecise,		0x38),
+	CMN_EVENT_HNS(snp_pocq_addrhaz,			0x39),
+	CMN_EVENT_HNS(mc_retries_remote,		0x3a),
+	CMN_EVENT_HNS_SNH(mc_reqs_remote,		0x3b),
+	CMN_EVENT_HNS_CLS(class_mc_reqs_remote,		0x3c),
+
+	NULL
+};
+
+static const struct attribute_group arm_cmn_event_attrs_group = {
+	.name = "events",
+	.attrs = arm_cmn_event_attrs,
+	.is_visible = arm_cmn_event_attr_is_visible,
+};
+
+static ssize_t arm_cmn_format_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct arm_cmn_format_attr *fmt = container_of(attr, typeof(*fmt), attr);
+	int lo = __ffs(fmt->field), hi = __fls(fmt->field);
+
+	if (lo == hi)
+		return sysfs_emit(buf, "config:%d\n", lo);
+
+	if (!fmt->config)
+		return sysfs_emit(buf, "config:%d-%d\n", lo, hi);
+
+	return sysfs_emit(buf, "config%d:%d-%d\n", fmt->config, lo, hi);
+}
+
+#define _CMN_FORMAT_ATTR(_name, _cfg, _fld)				\
+	(&((struct arm_cmn_format_attr[]) {{				\
+		.attr = __ATTR(_name, 0444, arm_cmn_format_show, NULL),	\
+		.config = _cfg,						\
+		.field = _fld,						\
+	}})[0].attr.attr)
+#define CMN_FORMAT_ATTR(_name, _fld)	_CMN_FORMAT_ATTR(_name, 0, _fld)
+
+static struct attribute *arm_cmn_format_attrs[] = {
+	CMN_FORMAT_ATTR(type, CMN_CONFIG_TYPE),
+	CMN_FORMAT_ATTR(eventid, CMN_CONFIG_EVENTID),
+	CMN_FORMAT_ATTR(occupid, CMN_CONFIG_OCCUPID),
+	CMN_FORMAT_ATTR(bynodeid, CMN_CONFIG_BYNODEID),
+	CMN_FORMAT_ATTR(nodeid, CMN_CONFIG_NODEID),
+
+	CMN_FORMAT_ATTR(wp_dev_sel, CMN_CONFIG_WP_DEV_SEL),
+	CMN_FORMAT_ATTR(wp_chn_sel, CMN_CONFIG_WP_CHN_SEL),
+	CMN_FORMAT_ATTR(wp_grp, CMN_CONFIG_WP_GRP),
+	CMN_FORMAT_ATTR(wp_exclusive, CMN_CONFIG_WP_EXCLUSIVE),
+	CMN_FORMAT_ATTR(wp_combine, CMN_CONFIG_WP_COMBINE),
+
+	_CMN_FORMAT_ATTR(wp_val, 1, CMN_CONFIG1_WP_VAL),
+	_CMN_FORMAT_ATTR(wp_mask, 2, CMN_CONFIG2_WP_MASK),
+
+	NULL
+};
+
+static const struct attribute_group arm_cmn_format_attrs_group = {
+	.name = "format",
+	.attrs = arm_cmn_format_attrs,
+};
+
+static ssize_t arm_cmn_cpumask_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct arm_cmn *cmn = to_cmn(dev_get_drvdata(dev));
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(cmn->cpu));
+}
+
+static struct device_attribute arm_cmn_cpumask_attr =
+		__ATTR(cpumask, 0444, arm_cmn_cpumask_show, NULL);
+
+static ssize_t arm_cmn_identifier_show(struct device *dev,
+				       struct device_attribute *attr, char *buf)
+{
+	struct arm_cmn *cmn = to_cmn(dev_get_drvdata(dev));
+
+	return sysfs_emit(buf, "%03x%02x\n", cmn->part, cmn->rev);
+}
+
+static struct device_attribute arm_cmn_identifier_attr =
+		__ATTR(identifier, 0444, arm_cmn_identifier_show, NULL);
+
+static struct attribute *arm_cmn_other_attrs[] = {
+	&arm_cmn_cpumask_attr.attr,
+	&arm_cmn_identifier_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group arm_cmn_other_attrs_group = {
+	.attrs = arm_cmn_other_attrs,
+};
+
+static const struct attribute_group *arm_cmn_attr_groups[] = {
+	&arm_cmn_event_attrs_group,
+	&arm_cmn_format_attrs_group,
+	&arm_cmn_other_attrs_group,
+	NULL
+};
+
+static int arm_cmn_wp_idx(struct perf_event *event)
+{
+	return CMN_EVENT_EVENTID(event) + CMN_EVENT_WP_GRP(event);
+}
+
+static u32 arm_cmn_wp_config(struct perf_event *event)
+{
+	u32 config;
+	u32 dev = CMN_EVENT_WP_DEV_SEL(event);
+	u32 chn = CMN_EVENT_WP_CHN_SEL(event);
+	u32 grp = CMN_EVENT_WP_GRP(event);
+	u32 exc = CMN_EVENT_WP_EXCLUSIVE(event);
+	u32 combine = CMN_EVENT_WP_COMBINE(event);
+	bool is_cmn600 = to_cmn(event->pmu)->part == PART_CMN600;
+
+	config = FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL, dev) |
+		 FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_CHN_SEL, chn) |
+		 FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_GRP, grp) |
+		 FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL2, dev >> 1);
+	if (exc)
+		config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_EXCLUSIVE :
+				      CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE;
+	if (combine && !grp)
+		config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_COMBINE :
+				      CMN_DTM_WPn_CONFIG_WP_COMBINE;
+	return config;
+}
+
+static void arm_cmn_set_state(struct arm_cmn *cmn, u32 state)
+{
+	if (!cmn->state)
+		writel_relaxed(0, cmn->dtc[0].base + CMN_DT_PMCR);
+	cmn->state |= state;
+}
+
+static void arm_cmn_clear_state(struct arm_cmn *cmn, u32 state)
+{
+	cmn->state &= ~state;
+	if (!cmn->state)
+		writel_relaxed(CMN_DT_PMCR_PMU_EN | CMN_DT_PMCR_OVFL_INTR_EN,
+			       cmn->dtc[0].base + CMN_DT_PMCR);
+}
+
+static void arm_cmn_pmu_enable(struct pmu *pmu)
+{
+	arm_cmn_clear_state(to_cmn(pmu), CMN_STATE_DISABLED);
+}
+
+static void arm_cmn_pmu_disable(struct pmu *pmu)
+{
+	arm_cmn_set_state(to_cmn(pmu), CMN_STATE_DISABLED);
+}
+
+static u64 arm_cmn_read_dtm(struct arm_cmn *cmn, struct arm_cmn_hw_event *hw,
+			    bool snapshot)
+{
+	struct arm_cmn_dtm *dtm = NULL;
+	struct arm_cmn_node *dn;
+	unsigned int i, offset, dtm_idx;
+	u64 reg, count = 0;
+
+	offset = snapshot ? CMN_DTM_PMEVCNTSR : CMN_DTM_PMEVCNT;
+	for_each_hw_dn(hw, dn, i) {
+		if (dtm != &cmn->dtms[dn->dtm]) {
+			dtm = &cmn->dtms[dn->dtm] + hw->dtm_offset;
+			reg = readq_relaxed(dtm->base + offset);
+		}
+		dtm_idx = arm_cmn_get_index(hw->dtm_idx, i);
+		count += (u16)(reg >> (dtm_idx * 16));
+	}
+	return count;
+}
+
+static u64 arm_cmn_read_cc(struct arm_cmn_dtc *dtc)
+{
+	u64 val = readq_relaxed(dtc->base + CMN_DT_PMCCNTR);
+
+	writeq_relaxed(CMN_CC_INIT, dtc->base + CMN_DT_PMCCNTR);
+	return (val - CMN_CC_INIT) & ((CMN_CC_INIT << 1) - 1);
+}
+
+static u32 arm_cmn_read_counter(struct arm_cmn_dtc *dtc, int idx)
+{
+	u32 val, pmevcnt = CMN_DT_PMEVCNT(idx);
+
+	val = readl_relaxed(dtc->base + pmevcnt);
+	writel_relaxed(CMN_COUNTER_INIT, dtc->base + pmevcnt);
+	return val - CMN_COUNTER_INIT;
+}
+
+static void arm_cmn_init_counter(struct perf_event *event)
+{
+	struct arm_cmn *cmn = to_cmn(event->pmu);
+	struct arm_cmn_hw_event *hw = to_cmn_hw(event);
+	unsigned int i, pmevcnt = CMN_DT_PMEVCNT(hw->dtc_idx);
+	u64 count;
+
+	for (i = 0; hw->dtcs_used & (1U << i); i++) {
+		writel_relaxed(CMN_COUNTER_INIT, cmn->dtc[i].base + pmevcnt);
+		cmn->dtc[i].counters[hw->dtc_idx] = event;
+	}
+
+	count = arm_cmn_read_dtm(cmn, hw, false);
+	local64_set(&event->hw.prev_count, count);
+}
+
+static void arm_cmn_event_read(struct perf_event *event)
+{
+	struct arm_cmn *cmn = to_cmn(event->pmu);
+	struct arm_cmn_hw_event *hw = to_cmn_hw(event);
+	u64 delta, new, prev;
+	unsigned long flags;
+	unsigned int i;
+
+	if (hw->dtc_idx == CMN_DT_NUM_COUNTERS) {
+		i = __ffs(hw->dtcs_used);
+		delta = arm_cmn_read_cc(cmn->dtc + i);
+		local64_add(delta, &event->count);
+		return;
+	}
+	new = arm_cmn_read_dtm(cmn, hw, false);
+	prev = local64_xchg(&event->hw.prev_count, new);
+
+	delta = new - prev;
+
+	local_irq_save(flags);
+	for (i = 0; hw->dtcs_used & (1U << i); i++) {
+		new = arm_cmn_read_counter(cmn->dtc + i, hw->dtc_idx);
+		delta += new << 16;
+	}
+	local_irq_restore(flags);
+	local64_add(delta, &event->count);
+}
+
+static int arm_cmn_set_event_sel_hi(struct arm_cmn_node *dn,
+				    enum cmn_filter_select fsel, u8 occupid)
+{
+	u64 reg;
+
+	if (fsel == SEL_NONE)
+		return 0;
+
+	if (!dn->occupid[fsel].count) {
+		dn->occupid[fsel].val = occupid;
+		reg = FIELD_PREP(CMN__PMU_CBUSY_SNTHROTTLE_SEL,
+				 dn->occupid[SEL_CBUSY_SNTHROTTLE_SEL].val) |
+		      FIELD_PREP(CMN__PMU_SN_HOME_SEL,
+				 dn->occupid[SEL_SN_HOME_SEL].val) |
+		      FIELD_PREP(CMN__PMU_HBT_LBT_SEL,
+				 dn->occupid[SEL_HBT_LBT_SEL].val) |
+		      FIELD_PREP(CMN__PMU_CLASS_OCCUP_ID,
+				 dn->occupid[SEL_CLASS_OCCUP_ID].val) |
+		      FIELD_PREP(CMN__PMU_OCCUP1_ID,
+				 dn->occupid[SEL_OCCUP1ID].val);
+		writel_relaxed(reg >> 32, dn->pmu_base + CMN_PMU_EVENT_SEL + 4);
+	} else if (dn->occupid[fsel].val != occupid) {
+		return -EBUSY;
+	}
+	dn->occupid[fsel].count++;
+	return 0;
+}
+
+static void arm_cmn_set_event_sel_lo(struct arm_cmn_node *dn, int dtm_idx,
+				     int eventid, bool wide_sel)
+{
+	if (wide_sel) {
+		dn->event_w[dtm_idx] = eventid;
+		writeq_relaxed(le64_to_cpu(dn->event_sel_w), dn->pmu_base + CMN_PMU_EVENT_SEL);
+	} else {
+		dn->event[dtm_idx] = eventid;
+		writel_relaxed(le32_to_cpu(dn->event_sel), dn->pmu_base + CMN_PMU_EVENT_SEL);
+	}
+}
+
+static void arm_cmn_event_start(struct perf_event *event, int flags)
+{
+	struct arm_cmn *cmn = to_cmn(event->pmu);
+	struct arm_cmn_hw_event *hw = to_cmn_hw(event);
+	struct arm_cmn_node *dn;
+	enum cmn_node_type type = CMN_EVENT_TYPE(event);
+	int i;
+
+	if (type == CMN_TYPE_DTC) {
+		i = __ffs(hw->dtcs_used);
+		writeq_relaxed(CMN_CC_INIT, cmn->dtc[i].base + CMN_DT_PMCCNTR);
+		cmn->dtc[i].cc_active = true;
+	} else if (type == CMN_TYPE_WP) {
+		int wp_idx = arm_cmn_wp_idx(event);
+		u64 val = CMN_EVENT_WP_VAL(event);
+		u64 mask = CMN_EVENT_WP_MASK(event);
+
+		for_each_hw_dn(hw, dn, i) {
+			void __iomem *base = dn->pmu_base + CMN_DTM_OFFSET(hw->dtm_offset);
+
+			writeq_relaxed(val, base + CMN_DTM_WPn_VAL(wp_idx));
+			writeq_relaxed(mask, base + CMN_DTM_WPn_MASK(wp_idx));
+		}
+	} else for_each_hw_dn(hw, dn, i) {
+		int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i);
+
+		arm_cmn_set_event_sel_lo(dn, dtm_idx, CMN_EVENT_EVENTID(event),
+					 hw->wide_sel);
+	}
+}
+
+static void arm_cmn_event_stop(struct perf_event *event, int flags)
+{
+	struct arm_cmn *cmn = to_cmn(event->pmu);
+	struct arm_cmn_hw_event *hw = to_cmn_hw(event);
+	struct arm_cmn_node *dn;
+	enum cmn_node_type type = CMN_EVENT_TYPE(event);
+	int i;
+
+	if (type == CMN_TYPE_DTC) {
+		i = __ffs(hw->dtcs_used);
+		cmn->dtc[i].cc_active = false;
+	} else if (type == CMN_TYPE_WP) {
+		int wp_idx = arm_cmn_wp_idx(event);
+
+		for_each_hw_dn(hw, dn, i) {
+			void __iomem *base = dn->pmu_base + CMN_DTM_OFFSET(hw->dtm_offset);
+
+			writeq_relaxed(0, base + CMN_DTM_WPn_MASK(wp_idx));
+			writeq_relaxed(~0ULL, base + CMN_DTM_WPn_VAL(wp_idx));
+		}
+	} else for_each_hw_dn(hw, dn, i) {
+		int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i);
+
+		arm_cmn_set_event_sel_lo(dn, dtm_idx, 0, hw->wide_sel);
+	}
+
+	arm_cmn_event_read(event);
+}
+
+struct arm_cmn_val {
+	u8 dtm_count[CMN_MAX_DTMS];
+	u8 occupid[CMN_MAX_DTMS][SEL_MAX];
+	u8 wp[CMN_MAX_DTMS][4];
+	int dtc_count;
+	bool cycles;
+};
+
+static void arm_cmn_val_add_event(struct arm_cmn *cmn, struct arm_cmn_val *val,
+				  struct perf_event *event)
+{
+	struct arm_cmn_hw_event *hw = to_cmn_hw(event);
+	struct arm_cmn_node *dn;
+	enum cmn_node_type type;
+	int i;
+
+	if (is_software_event(event))
+		return;
+
+	type = CMN_EVENT_TYPE(event);
+	if (type == CMN_TYPE_DTC) {
+		val->cycles = true;
+		return;
+	}
+
+	val->dtc_count++;
+
+	for_each_hw_dn(hw, dn, i) {
+		int wp_idx, dtm = dn->dtm, sel = hw->filter_sel;
+
+		val->dtm_count[dtm]++;
+
+		if (sel > SEL_NONE)
+			val->occupid[dtm][sel] = CMN_EVENT_OCCUPID(event) + 1;
+
+		if (type != CMN_TYPE_WP)
+			continue;
+
+		wp_idx = arm_cmn_wp_idx(event);
+		val->wp[dtm][wp_idx] = CMN_EVENT_WP_COMBINE(event) + 1;
+	}
+}
+
+static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event)
+{
+	struct arm_cmn_hw_event *hw = to_cmn_hw(event);
+	struct arm_cmn_node *dn;
+	struct perf_event *sibling, *leader = event->group_leader;
+	enum cmn_node_type type;
+	struct arm_cmn_val *val;
+	int i, ret = -EINVAL;
+
+	if (leader == event)
+		return 0;
+
+	if (event->pmu != leader->pmu && !is_software_event(leader))
+		return -EINVAL;
+
+	val = kzalloc(sizeof(*val), GFP_KERNEL);
+	if (!val)
+		return -ENOMEM;
+
+	arm_cmn_val_add_event(cmn, val, leader);
+	for_each_sibling_event(sibling, leader)
+		arm_cmn_val_add_event(cmn, val, sibling);
+
+	type = CMN_EVENT_TYPE(event);
+	if (type == CMN_TYPE_DTC) {
+		ret = val->cycles ? -EINVAL : 0;
+		goto done;
+	}
+
+	if (val->dtc_count == CMN_DT_NUM_COUNTERS)
+		goto done;
+
+	for_each_hw_dn(hw, dn, i) {
+		int wp_idx, wp_cmb, dtm = dn->dtm, sel = hw->filter_sel;
+
+		if (val->dtm_count[dtm] == CMN_DTM_NUM_COUNTERS)
+			goto done;
+
+		if (sel > SEL_NONE && val->occupid[dtm][sel] &&
+		    val->occupid[dtm][sel] != CMN_EVENT_OCCUPID(event) + 1)
+			goto done;
+
+		if (type != CMN_TYPE_WP)
+			continue;
+
+		wp_idx = arm_cmn_wp_idx(event);
+		if (val->wp[dtm][wp_idx])
+			goto done;
+
+		wp_cmb = val->wp[dtm][wp_idx ^ 1];
+		if (wp_cmb && wp_cmb != CMN_EVENT_WP_COMBINE(event) + 1)
+			goto done;
+	}
+
+	ret = 0;
+done:
+	kfree(val);
+	return ret;
+}
+
+static enum cmn_filter_select arm_cmn_filter_sel(const struct arm_cmn *cmn,
+						 enum cmn_node_type type,
+						 unsigned int eventid)
+{
+	struct arm_cmn_event_attr *e;
+	enum cmn_model model = arm_cmn_model(cmn);
+
+	for (int i = 0; i < ARRAY_SIZE(arm_cmn_event_attrs) - 1; i++) {
+		e = container_of(arm_cmn_event_attrs[i], typeof(*e), attr.attr);
+		if (e->model & model && e->type == type && e->eventid == eventid)
+			return e->fsel;
+	}
+	return SEL_NONE;
+}
+
+
+static int arm_cmn_event_init(struct perf_event *event)
+{
+	struct arm_cmn *cmn = to_cmn(event->pmu);
+	struct arm_cmn_hw_event *hw = to_cmn_hw(event);
+	struct arm_cmn_node *dn;
+	enum cmn_node_type type;
+	bool bynodeid;
+	u16 nodeid, eventid;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+		return -EINVAL;
+
+	event->cpu = cmn->cpu;
+	if (event->cpu < 0)
+		return -EINVAL;
+
+	type = CMN_EVENT_TYPE(event);
+	/* DTC events (i.e. cycles) already have everything they need */
+	if (type == CMN_TYPE_DTC)
+		return arm_cmn_validate_group(cmn, event);
+
+	eventid = CMN_EVENT_EVENTID(event);
+	/* For watchpoints we need the actual XP node here */
+	if (type == CMN_TYPE_WP) {
+		type = CMN_TYPE_XP;
+		/* ...and we need a "real" direction */
+		if (eventid != CMN_WP_UP && eventid != CMN_WP_DOWN)
+			return -EINVAL;
+		/* ...but the DTM may depend on which port we're watching */
+		if (cmn->multi_dtm)
+			hw->dtm_offset = CMN_EVENT_WP_DEV_SEL(event) / 2;
+	} else if (type == CMN_TYPE_XP && cmn->part == PART_CMN700) {
+		hw->wide_sel = true;
+	}
+
+	/* This is sufficiently annoying to recalculate, so cache it */
+	hw->filter_sel = arm_cmn_filter_sel(cmn, type, eventid);
+
+	bynodeid = CMN_EVENT_BYNODEID(event);
+	nodeid = CMN_EVENT_NODEID(event);
+
+	hw->dn = arm_cmn_node(cmn, type);
+	if (!hw->dn)
+		return -EINVAL;
+	for (dn = hw->dn; dn->type == type; dn++) {
+		if (bynodeid && dn->id != nodeid) {
+			hw->dn++;
+			continue;
+		}
+		hw->num_dns++;
+		if (bynodeid)
+			break;
+	}
+
+	if (!hw->num_dns) {
+		struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, nodeid);
+
+		dev_dbg(cmn->dev, "invalid node 0x%x (%d,%d,%d,%d) type 0x%x\n",
+			nodeid, nid.x, nid.y, nid.port, nid.dev, type);
+		return -EINVAL;
+	}
+	/*
+	 * Keep assuming non-cycles events count in all DTC domains; turns out
+	 * it's hard to make a worthwhile optimisation around this, short of
+	 * going all-in with domain-local counter allocation as well.
+	 */
+	hw->dtcs_used = (1U << cmn->num_dtcs) - 1;
+
+	return arm_cmn_validate_group(cmn, event);
+}
+
+static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event,
+				int i)
+{
+	struct arm_cmn_hw_event *hw = to_cmn_hw(event);
+	enum cmn_node_type type = CMN_EVENT_TYPE(event);
+
+	while (i--) {
+		struct arm_cmn_dtm *dtm = &cmn->dtms[hw->dn[i].dtm] + hw->dtm_offset;
+		unsigned int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i);
+
+		if (type == CMN_TYPE_WP)
+			dtm->wp_event[arm_cmn_wp_idx(event)] = -1;
+
+		if (hw->filter_sel > SEL_NONE)
+			hw->dn[i].occupid[hw->filter_sel].count--;
+
+		dtm->pmu_config_low &= ~CMN__PMEVCNT_PAIRED(dtm_idx);
+		writel_relaxed(dtm->pmu_config_low, dtm->base + CMN_DTM_PMU_CONFIG);
+	}
+	memset(hw->dtm_idx, 0, sizeof(hw->dtm_idx));
+
+	for (i = 0; hw->dtcs_used & (1U << i); i++)
+		cmn->dtc[i].counters[hw->dtc_idx] = NULL;
+}
+
+static int arm_cmn_event_add(struct perf_event *event, int flags)
+{
+	struct arm_cmn *cmn = to_cmn(event->pmu);
+	struct arm_cmn_hw_event *hw = to_cmn_hw(event);
+	struct arm_cmn_dtc *dtc = &cmn->dtc[0];
+	struct arm_cmn_node *dn;
+	enum cmn_node_type type = CMN_EVENT_TYPE(event);
+	unsigned int i, dtc_idx, input_sel;
+
+	if (type == CMN_TYPE_DTC) {
+		i = 0;
+		while (cmn->dtc[i].cycles)
+			if (++i == cmn->num_dtcs)
+				return -ENOSPC;
+
+		cmn->dtc[i].cycles = event;
+		hw->dtc_idx = CMN_DT_NUM_COUNTERS;
+		hw->dtcs_used = 1U << i;
+
+		if (flags & PERF_EF_START)
+			arm_cmn_event_start(event, 0);
+		return 0;
+	}
+
+	/* Grab a free global counter first... */
+	dtc_idx = 0;
+	while (dtc->counters[dtc_idx])
+		if (++dtc_idx == CMN_DT_NUM_COUNTERS)
+			return -ENOSPC;
+
+	hw->dtc_idx = dtc_idx;
+
+	/* ...then the local counters to feed it. */
+	for_each_hw_dn(hw, dn, i) {
+		struct arm_cmn_dtm *dtm = &cmn->dtms[dn->dtm] + hw->dtm_offset;
+		unsigned int dtm_idx, shift;
+		u64 reg;
+
+		dtm_idx = 0;
+		while (dtm->pmu_config_low & CMN__PMEVCNT_PAIRED(dtm_idx))
+			if (++dtm_idx == CMN_DTM_NUM_COUNTERS)
+				goto free_dtms;
+
+		if (type == CMN_TYPE_XP) {
+			input_sel = CMN__PMEVCNT0_INPUT_SEL_XP + dtm_idx;
+		} else if (type == CMN_TYPE_WP) {
+			int tmp, wp_idx = arm_cmn_wp_idx(event);
+			u32 cfg = arm_cmn_wp_config(event);
+
+			if (dtm->wp_event[wp_idx] >= 0)
+				goto free_dtms;
+
+			tmp = dtm->wp_event[wp_idx ^ 1];
+			if (tmp >= 0 && CMN_EVENT_WP_COMBINE(event) !=
+					CMN_EVENT_WP_COMBINE(dtc->counters[tmp]))
+				goto free_dtms;
+
+			input_sel = CMN__PMEVCNT0_INPUT_SEL_WP + wp_idx;
+			dtm->wp_event[wp_idx] = dtc_idx;
+			writel_relaxed(cfg, dtm->base + CMN_DTM_WPn_CONFIG(wp_idx));
+		} else {
+			struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
+
+			if (cmn->multi_dtm)
+				nid.port %= 2;
+
+			input_sel = CMN__PMEVCNT0_INPUT_SEL_DEV + dtm_idx +
+				    (nid.port << 4) + (nid.dev << 2);
+
+			if (arm_cmn_set_event_sel_hi(dn, hw->filter_sel, CMN_EVENT_OCCUPID(event)))
+				goto free_dtms;
+		}
+
+		arm_cmn_set_index(hw->dtm_idx, i, dtm_idx);
+
+		dtm->input_sel[dtm_idx] = input_sel;
+		shift = CMN__PMEVCNTn_GLOBAL_NUM_SHIFT(dtm_idx);
+		dtm->pmu_config_low &= ~(CMN__PMEVCNT0_GLOBAL_NUM << shift);
+		dtm->pmu_config_low |= FIELD_PREP(CMN__PMEVCNT0_GLOBAL_NUM, dtc_idx) << shift;
+		dtm->pmu_config_low |= CMN__PMEVCNT_PAIRED(dtm_idx);
+		reg = (u64)le32_to_cpu(dtm->pmu_config_high) << 32 | dtm->pmu_config_low;
+		writeq_relaxed(reg, dtm->base + CMN_DTM_PMU_CONFIG);
+	}
+
+	/* Go go go! */
+	arm_cmn_init_counter(event);
+
+	if (flags & PERF_EF_START)
+		arm_cmn_event_start(event, 0);
+
+	return 0;
+
+free_dtms:
+	arm_cmn_event_clear(cmn, event, i);
+	return -ENOSPC;
+}
+
+static void arm_cmn_event_del(struct perf_event *event, int flags)
+{
+	struct arm_cmn *cmn = to_cmn(event->pmu);
+	struct arm_cmn_hw_event *hw = to_cmn_hw(event);
+	enum cmn_node_type type = CMN_EVENT_TYPE(event);
+
+	arm_cmn_event_stop(event, PERF_EF_UPDATE);
+
+	if (type == CMN_TYPE_DTC)
+		cmn->dtc[__ffs(hw->dtcs_used)].cycles = NULL;
+	else
+		arm_cmn_event_clear(cmn, event, hw->num_dns);
+}
+
+/*
+ * We stop the PMU for both add and read, to avoid skew across DTM counters.
+ * In theory we could use snapshots to read without stopping, but then it
+ * becomes a lot trickier to deal with overlow and racing against interrupts,
+ * plus it seems they don't work properly on some hardware anyway :(
+ */
+static void arm_cmn_start_txn(struct pmu *pmu, unsigned int flags)
+{
+	arm_cmn_set_state(to_cmn(pmu), CMN_STATE_TXN);
+}
+
+static void arm_cmn_end_txn(struct pmu *pmu)
+{
+	arm_cmn_clear_state(to_cmn(pmu), CMN_STATE_TXN);
+}
+
+static int arm_cmn_commit_txn(struct pmu *pmu)
+{
+	arm_cmn_end_txn(pmu);
+	return 0;
+}
+
+static void arm_cmn_migrate(struct arm_cmn *cmn, unsigned int cpu)
+{
+	unsigned int i;
+
+	perf_pmu_migrate_context(&cmn->pmu, cmn->cpu, cpu);
+	for (i = 0; i < cmn->num_dtcs; i++)
+		irq_set_affinity(cmn->dtc[i].irq, cpumask_of(cpu));
+	cmn->cpu = cpu;
+}
+
+static int arm_cmn_pmu_online_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
+{
+	struct arm_cmn *cmn;
+	int node;
+
+	cmn = hlist_entry_safe(cpuhp_node, struct arm_cmn, cpuhp_node);
+	node = dev_to_node(cmn->dev);
+	if (node != NUMA_NO_NODE && cpu_to_node(cmn->cpu) != node && cpu_to_node(cpu) == node)
+		arm_cmn_migrate(cmn, cpu);
+	return 0;
+}
+
+static int arm_cmn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
+{
+	struct arm_cmn *cmn;
+	unsigned int target;
+	int node;
+	cpumask_t mask;
+
+	cmn = hlist_entry_safe(cpuhp_node, struct arm_cmn, cpuhp_node);
+	if (cpu != cmn->cpu)
+		return 0;
+
+	node = dev_to_node(cmn->dev);
+	if (cpumask_and(&mask, cpumask_of_node(node), cpu_online_mask) &&
+	    cpumask_andnot(&mask, &mask, cpumask_of(cpu)))
+		target = cpumask_any(&mask);
+	else
+		target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target < nr_cpu_ids)
+		arm_cmn_migrate(cmn, target);
+	return 0;
+}
+
+static irqreturn_t arm_cmn_handle_irq(int irq, void *dev_id)
+{
+	struct arm_cmn_dtc *dtc = dev_id;
+	irqreturn_t ret = IRQ_NONE;
+
+	for (;;) {
+		u32 status = readl_relaxed(dtc->base + CMN_DT_PMOVSR);
+		u64 delta;
+		int i;
+
+		for (i = 0; i < CMN_DT_NUM_COUNTERS; i++) {
+			if (status & (1U << i)) {
+				ret = IRQ_HANDLED;
+				if (WARN_ON(!dtc->counters[i]))
+					continue;
+				delta = (u64)arm_cmn_read_counter(dtc, i) << 16;
+				local64_add(delta, &dtc->counters[i]->count);
+			}
+		}
+
+		if (status & (1U << CMN_DT_NUM_COUNTERS)) {
+			ret = IRQ_HANDLED;
+			if (dtc->cc_active && !WARN_ON(!dtc->cycles)) {
+				delta = arm_cmn_read_cc(dtc);
+				local64_add(delta, &dtc->cycles->count);
+			}
+		}
+
+		writel_relaxed(status, dtc->base + CMN_DT_PMOVSR_CLR);
+
+		if (!dtc->irq_friend)
+			return ret;
+		dtc += dtc->irq_friend;
+	}
+}
+
+/* We can reasonably accommodate DTCs of the same CMN sharing IRQs */
+static int arm_cmn_init_irqs(struct arm_cmn *cmn)
+{
+	int i, j, irq, err;
+
+	for (i = 0; i < cmn->num_dtcs; i++) {
+		irq = cmn->dtc[i].irq;
+		for (j = i; j--; ) {
+			if (cmn->dtc[j].irq == irq) {
+				cmn->dtc[j].irq_friend = i - j;
+				goto next;
+			}
+		}
+		err = devm_request_irq(cmn->dev, irq, arm_cmn_handle_irq,
+				       IRQF_NOBALANCING | IRQF_NO_THREAD,
+				       dev_name(cmn->dev), &cmn->dtc[i]);
+		if (err)
+			return err;
+
+		err = irq_set_affinity(irq, cpumask_of(cmn->cpu));
+		if (err)
+			return err;
+	next:
+		; /* isn't C great? */
+	}
+	return 0;
+}
+
+static void arm_cmn_init_dtm(struct arm_cmn_dtm *dtm, struct arm_cmn_node *xp, int idx)
+{
+	int i;
+
+	dtm->base = xp->pmu_base + CMN_DTM_OFFSET(idx);
+	dtm->pmu_config_low = CMN_DTM_PMU_CONFIG_PMU_EN;
+	writeq_relaxed(dtm->pmu_config_low, dtm->base + CMN_DTM_PMU_CONFIG);
+	for (i = 0; i < 4; i++) {
+		dtm->wp_event[i] = -1;
+		writeq_relaxed(0, dtm->base + CMN_DTM_WPn_MASK(i));
+		writeq_relaxed(~0ULL, dtm->base + CMN_DTM_WPn_VAL(i));
+	}
+}
+
+static int arm_cmn_init_dtc(struct arm_cmn *cmn, struct arm_cmn_node *dn, int idx)
+{
+	struct arm_cmn_dtc *dtc = cmn->dtc + idx;
+
+	dtc->base = dn->pmu_base - CMN_PMU_OFFSET;
+	dtc->irq = platform_get_irq(to_platform_device(cmn->dev), idx);
+	if (dtc->irq < 0)
+		return dtc->irq;
+
+	writel_relaxed(CMN_DT_DTC_CTL_DT_EN, dtc->base + CMN_DT_DTC_CTL);
+	writel_relaxed(CMN_DT_PMCR_PMU_EN | CMN_DT_PMCR_OVFL_INTR_EN, dtc->base + CMN_DT_PMCR);
+	writeq_relaxed(0, dtc->base + CMN_DT_PMCCNTR);
+	writel_relaxed(0x1ff, dtc->base + CMN_DT_PMOVSR_CLR);
+
+	return 0;
+}
+
+static int arm_cmn_node_cmp(const void *a, const void *b)
+{
+	const struct arm_cmn_node *dna = a, *dnb = b;
+	int cmp;
+
+	cmp = dna->type - dnb->type;
+	if (!cmp)
+		cmp = dna->logid - dnb->logid;
+	return cmp;
+}
+
+static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
+{
+	struct arm_cmn_node *dn, *xp;
+	int dtc_idx = 0;
+	u8 dtcs_present = (1 << cmn->num_dtcs) - 1;
+
+	cmn->dtc = devm_kcalloc(cmn->dev, cmn->num_dtcs, sizeof(cmn->dtc[0]), GFP_KERNEL);
+	if (!cmn->dtc)
+		return -ENOMEM;
+
+	sort(cmn->dns, cmn->num_dns, sizeof(cmn->dns[0]), arm_cmn_node_cmp, NULL);
+
+	cmn->xps = arm_cmn_node(cmn, CMN_TYPE_XP);
+
+	for (dn = cmn->dns; dn->type; dn++) {
+		if (dn->type == CMN_TYPE_XP) {
+			dn->dtc &= dtcs_present;
+			continue;
+		}
+
+		xp = arm_cmn_node_to_xp(cmn, dn);
+		dn->dtm = xp->dtm;
+		if (cmn->multi_dtm)
+			dn->dtm += arm_cmn_nid(cmn, dn->id).port / 2;
+
+		if (dn->type == CMN_TYPE_DTC) {
+			int err;
+			/* We do at least know that a DTC's XP must be in that DTC's domain */
+			if (xp->dtc == 0xf)
+				xp->dtc = 1 << dtc_idx;
+			err = arm_cmn_init_dtc(cmn, dn, dtc_idx++);
+			if (err)
+				return err;
+		}
+
+		/* To the PMU, RN-Ds don't add anything over RN-Is, so smoosh them together */
+		if (dn->type == CMN_TYPE_RND)
+			dn->type = CMN_TYPE_RNI;
+
+		/* We split the RN-I off already, so let the CCLA part match CCLA events */
+		if (dn->type == CMN_TYPE_CCLA_RNI)
+			dn->type = CMN_TYPE_CCLA;
+	}
+
+	arm_cmn_set_state(cmn, CMN_STATE_DISABLED);
+
+	return 0;
+}
+
+static unsigned int arm_cmn_dtc_domain(struct arm_cmn *cmn, void __iomem *xp_region)
+{
+	int offset = CMN_DTM_UNIT_INFO;
+
+	if (cmn->part == PART_CMN650 || cmn->part == PART_CI700)
+		offset = CMN650_DTM_UNIT_INFO;
+
+	return FIELD_GET(CMN_DTM_UNIT_INFO_DTC_DOMAIN, readl_relaxed(xp_region + offset));
+}
+
+static void arm_cmn_init_node_info(struct arm_cmn *cmn, u32 offset, struct arm_cmn_node *node)
+{
+	int level;
+	u64 reg = readq_relaxed(cmn->base + offset + CMN_NODE_INFO);
+
+	node->type = FIELD_GET(CMN_NI_NODE_TYPE, reg);
+	node->id = FIELD_GET(CMN_NI_NODE_ID, reg);
+	node->logid = FIELD_GET(CMN_NI_LOGICAL_ID, reg);
+
+	node->pmu_base = cmn->base + offset + CMN_PMU_OFFSET;
+
+	if (node->type == CMN_TYPE_CFG)
+		level = 0;
+	else if (node->type == CMN_TYPE_XP)
+		level = 1;
+	else
+		level = 2;
+
+	dev_dbg(cmn->dev, "node%*c%#06hx%*ctype:%-#6x id:%-4hd off:%#x\n",
+			(level * 2) + 1, ' ', node->id, 5 - (level * 2), ' ',
+			node->type, node->logid, offset);
+}
+
+static enum cmn_node_type arm_cmn_subtype(enum cmn_node_type type)
+{
+	switch (type) {
+	case CMN_TYPE_HNP:
+		return CMN_TYPE_HNI;
+	case CMN_TYPE_CCLA_RNI:
+		return CMN_TYPE_RNI;
+	default:
+		return CMN_TYPE_INVALID;
+	}
+}
+
+static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
+{
+	void __iomem *cfg_region;
+	struct arm_cmn_node cfg, *dn;
+	struct arm_cmn_dtm *dtm;
+	enum cmn_part part;
+	u16 child_count, child_poff;
+	u32 xp_offset[CMN_MAX_XPS];
+	u64 reg;
+	int i, j;
+	size_t sz;
+
+	arm_cmn_init_node_info(cmn, rgn_offset, &cfg);
+	if (cfg.type != CMN_TYPE_CFG)
+		return -ENODEV;
+
+	cfg_region = cmn->base + rgn_offset;
+
+	reg = readq_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_01);
+	part = FIELD_GET(CMN_CFGM_PID0_PART_0, reg);
+	part |= FIELD_GET(CMN_CFGM_PID1_PART_1, reg) << 8;
+	if (cmn->part && cmn->part != part)
+		dev_warn(cmn->dev,
+			 "Firmware binding mismatch: expected part number 0x%x, found 0x%x\n",
+			 cmn->part, part);
+	cmn->part = part;
+	if (!arm_cmn_model(cmn))
+		dev_warn(cmn->dev, "Unknown part number: 0x%x\n", part);
+
+	reg = readl_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_23);
+	cmn->rev = FIELD_GET(CMN_CFGM_PID2_REVISION, reg);
+
+	reg = readq_relaxed(cfg_region + CMN_CFGM_INFO_GLOBAL);
+	cmn->multi_dtm = reg & CMN_INFO_MULTIPLE_DTM_EN;
+	cmn->rsp_vc_num = FIELD_GET(CMN_INFO_RSP_VC_NUM, reg);
+	cmn->dat_vc_num = FIELD_GET(CMN_INFO_DAT_VC_NUM, reg);
+
+	reg = readq_relaxed(cfg_region + CMN_CFGM_INFO_GLOBAL_1);
+	cmn->snp_vc_num = FIELD_GET(CMN_INFO_SNP_VC_NUM, reg);
+	cmn->req_vc_num = FIELD_GET(CMN_INFO_REQ_VC_NUM, reg);
+
+	reg = readq_relaxed(cfg_region + CMN_CHILD_INFO);
+	child_count = FIELD_GET(CMN_CI_CHILD_COUNT, reg);
+	child_poff = FIELD_GET(CMN_CI_CHILD_PTR_OFFSET, reg);
+
+	cmn->num_xps = child_count;
+	cmn->num_dns = cmn->num_xps;
+
+	/* Pass 1: visit the XPs, enumerate their children */
+	for (i = 0; i < cmn->num_xps; i++) {
+		reg = readq_relaxed(cfg_region + child_poff + i * 8);
+		xp_offset[i] = reg & CMN_CHILD_NODE_ADDR;
+
+		reg = readq_relaxed(cmn->base + xp_offset[i] + CMN_CHILD_INFO);
+		cmn->num_dns += FIELD_GET(CMN_CI_CHILD_COUNT, reg);
+	}
+
+	/*
+	 * Some nodes effectively have two separate types, which we'll handle
+	 * by creating one of each internally. For a (very) safe initial upper
+	 * bound, account for double the number of non-XP nodes.
+	 */
+	dn = devm_kcalloc(cmn->dev, cmn->num_dns * 2 - cmn->num_xps,
+			  sizeof(*dn), GFP_KERNEL);
+	if (!dn)
+		return -ENOMEM;
+
+	/* Initial safe upper bound on DTMs for any possible mesh layout */
+	i = cmn->num_xps;
+	if (cmn->multi_dtm)
+		i += cmn->num_xps + 1;
+	dtm = devm_kcalloc(cmn->dev, i, sizeof(*dtm), GFP_KERNEL);
+	if (!dtm)
+		return -ENOMEM;
+
+	/* Pass 2: now we can actually populate the nodes */
+	cmn->dns = dn;
+	cmn->dtms = dtm;
+	for (i = 0; i < cmn->num_xps; i++) {
+		void __iomem *xp_region = cmn->base + xp_offset[i];
+		struct arm_cmn_node *xp = dn++;
+		unsigned int xp_ports = 0;
+
+		arm_cmn_init_node_info(cmn, xp_offset[i], xp);
+		/*
+		 * Thanks to the order in which XP logical IDs seem to be
+		 * assigned, we can handily infer the mesh X dimension by
+		 * looking out for the XP at (0,1) without needing to know
+		 * the exact node ID format, which we can later derive.
+		 */
+		if (xp->id == (1 << 3))
+			cmn->mesh_x = xp->logid;
+
+		if (cmn->part == PART_CMN600)
+			xp->dtc = 0xf;
+		else
+			xp->dtc = 1 << arm_cmn_dtc_domain(cmn, xp_region);
+
+		xp->dtm = dtm - cmn->dtms;
+		arm_cmn_init_dtm(dtm++, xp, 0);
+		/*
+		 * Keeping track of connected ports will let us filter out
+		 * unnecessary XP events easily. We can also reliably infer the
+		 * "extra device ports" configuration for the node ID format
+		 * from this, since in that case we will see at least one XP
+		 * with port 2 connected, for the HN-D.
+		 */
+		for (int p = 0; p < CMN_MAX_PORTS; p++)
+			if (arm_cmn_device_connect_info(cmn, xp, p))
+				xp_ports |= BIT(p);
+
+		if (cmn->multi_dtm && (xp_ports & 0xc))
+			arm_cmn_init_dtm(dtm++, xp, 1);
+		if (cmn->multi_dtm && (xp_ports & 0x30))
+			arm_cmn_init_dtm(dtm++, xp, 2);
+
+		cmn->ports_used |= xp_ports;
+
+		reg = readq_relaxed(xp_region + CMN_CHILD_INFO);
+		child_count = FIELD_GET(CMN_CI_CHILD_COUNT, reg);
+		child_poff = FIELD_GET(CMN_CI_CHILD_PTR_OFFSET, reg);
+
+		for (j = 0; j < child_count; j++) {
+			reg = readq_relaxed(xp_region + child_poff + j * 8);
+			/*
+			 * Don't even try to touch anything external, since in general
+			 * we haven't a clue how to power up arbitrary CHI requesters.
+			 * As of CMN-600r1 these could only be RN-SAMs or CXLAs,
+			 * neither of which have any PMU events anyway.
+			 * (Actually, CXLAs do seem to have grown some events in r1p2,
+			 * but they don't go to regular XP DTMs, and they depend on
+			 * secure configuration which we can't easily deal with)
+			 */
+			if (reg & CMN_CHILD_NODE_EXTERNAL) {
+				dev_dbg(cmn->dev, "ignoring external node %llx\n", reg);
+				continue;
+			}
+
+			arm_cmn_init_node_info(cmn, reg & CMN_CHILD_NODE_ADDR, dn);
+
+			switch (dn->type) {
+			case CMN_TYPE_DTC:
+				cmn->num_dtcs++;
+				dn++;
+				break;
+			/* These guys have PMU events */
+			case CMN_TYPE_DVM:
+			case CMN_TYPE_HNI:
+			case CMN_TYPE_HNF:
+			case CMN_TYPE_SBSX:
+			case CMN_TYPE_RNI:
+			case CMN_TYPE_RND:
+			case CMN_TYPE_MTSX:
+			case CMN_TYPE_CXRA:
+			case CMN_TYPE_CXHA:
+			case CMN_TYPE_CCRA:
+			case CMN_TYPE_CCHA:
+			case CMN_TYPE_CCLA:
+			case CMN_TYPE_HNS:
+				dn++;
+				break;
+			/* Nothing to see here */
+			case CMN_TYPE_MPAM_S:
+			case CMN_TYPE_MPAM_NS:
+			case CMN_TYPE_RNSAM:
+			case CMN_TYPE_CXLA:
+			case CMN_TYPE_HNS_MPAM_S:
+			case CMN_TYPE_HNS_MPAM_NS:
+				break;
+			/*
+			 * Split "optimised" combination nodes into separate
+			 * types for the different event sets. Offsetting the
+			 * base address lets us handle the second pmu_event_sel
+			 * register via the normal mechanism later.
+			 */
+			case CMN_TYPE_HNP:
+			case CMN_TYPE_CCLA_RNI:
+				dn[1] = dn[0];
+				dn[0].pmu_base += CMN_HNP_PMU_EVENT_SEL;
+				dn[1].type = arm_cmn_subtype(dn->type);
+				dn += 2;
+				break;
+			/* Something has gone horribly wrong */
+			default:
+				dev_err(cmn->dev, "invalid device node type: 0x%x\n", dn->type);
+				return -ENODEV;
+			}
+		}
+	}
+
+	/* Correct for any nodes we added or skipped */
+	cmn->num_dns = dn - cmn->dns;
+
+	/* Cheeky +1 to help terminate pointer-based iteration later */
+	sz = (void *)(dn + 1) - (void *)cmn->dns;
+	dn = devm_krealloc(cmn->dev, cmn->dns, sz, GFP_KERNEL);
+	if (dn)
+		cmn->dns = dn;
+
+	sz = (void *)dtm - (void *)cmn->dtms;
+	dtm = devm_krealloc(cmn->dev, cmn->dtms, sz, GFP_KERNEL);
+	if (dtm)
+		cmn->dtms = dtm;
+
+	/*
+	 * If mesh_x wasn't set during discovery then we never saw
+	 * an XP at (0,1), thus we must have an Nx1 configuration.
+	 */
+	if (!cmn->mesh_x)
+		cmn->mesh_x = cmn->num_xps;
+	cmn->mesh_y = cmn->num_xps / cmn->mesh_x;
+
+	/* 1x1 config plays havoc with XP event encodings */
+	if (cmn->num_xps == 1)
+		dev_warn(cmn->dev, "1x1 config not fully supported, translate XP events manually\n");
+
+	dev_dbg(cmn->dev, "periph_id part 0x%03x revision %d\n", cmn->part, cmn->rev);
+	reg = cmn->ports_used;
+	dev_dbg(cmn->dev, "mesh %dx%d, ID width %d, ports %6pbl%s\n",
+		cmn->mesh_x, cmn->mesh_y, arm_cmn_xyidbits(cmn), &reg,
+		cmn->multi_dtm ? ", multi-DTM" : "");
+
+	return 0;
+}
+
+static int arm_cmn600_acpi_probe(struct platform_device *pdev, struct arm_cmn *cmn)
+{
+	struct resource *cfg, *root;
+
+	cfg = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!cfg)
+		return -EINVAL;
+
+	root = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!root)
+		return -EINVAL;
+
+	if (!resource_contains(cfg, root))
+		swap(cfg, root);
+	/*
+	 * Note that devm_ioremap_resource() is dumb and won't let the platform
+	 * device claim cfg when the ACPI companion device has already claimed
+	 * root within it. But since they *are* already both claimed in the
+	 * appropriate name, we don't really need to do it again here anyway.
+	 */
+	cmn->base = devm_ioremap(cmn->dev, cfg->start, resource_size(cfg));
+	if (!cmn->base)
+		return -ENOMEM;
+
+	return root->start - cfg->start;
+}
+
+static int arm_cmn600_of_probe(struct device_node *np)
+{
+	u32 rootnode;
+
+	return of_property_read_u32(np, "arm,root-node", &rootnode) ?: rootnode;
+}
+
+static int arm_cmn_probe(struct platform_device *pdev)
+{
+	struct arm_cmn *cmn;
+	const char *name;
+	static atomic_t id;
+	int err, rootnode, this_id;
+
+	cmn = devm_kzalloc(&pdev->dev, sizeof(*cmn), GFP_KERNEL);
+	if (!cmn)
+		return -ENOMEM;
+
+	cmn->dev = &pdev->dev;
+	cmn->part = (unsigned long)device_get_match_data(cmn->dev);
+	platform_set_drvdata(pdev, cmn);
+
+	if (cmn->part == PART_CMN600 && has_acpi_companion(cmn->dev)) {
+		rootnode = arm_cmn600_acpi_probe(pdev, cmn);
+	} else {
+		rootnode = 0;
+		cmn->base = devm_platform_ioremap_resource(pdev, 0);
+		if (IS_ERR(cmn->base))
+			return PTR_ERR(cmn->base);
+		if (cmn->part == PART_CMN600)
+			rootnode = arm_cmn600_of_probe(pdev->dev.of_node);
+	}
+	if (rootnode < 0)
+		return rootnode;
+
+	err = arm_cmn_discover(cmn, rootnode);
+	if (err)
+		return err;
+
+	err = arm_cmn_init_dtcs(cmn);
+	if (err)
+		return err;
+
+	err = arm_cmn_init_irqs(cmn);
+	if (err)
+		return err;
+
+	cmn->cpu = cpumask_local_spread(0, dev_to_node(cmn->dev));
+	cmn->pmu = (struct pmu) {
+		.module = THIS_MODULE,
+		.attr_groups = arm_cmn_attr_groups,
+		.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+		.task_ctx_nr = perf_invalid_context,
+		.pmu_enable = arm_cmn_pmu_enable,
+		.pmu_disable = arm_cmn_pmu_disable,
+		.event_init = arm_cmn_event_init,
+		.add = arm_cmn_event_add,
+		.del = arm_cmn_event_del,
+		.start = arm_cmn_event_start,
+		.stop = arm_cmn_event_stop,
+		.read = arm_cmn_event_read,
+		.start_txn = arm_cmn_start_txn,
+		.commit_txn = arm_cmn_commit_txn,
+		.cancel_txn = arm_cmn_end_txn,
+	};
+
+	this_id = atomic_fetch_inc(&id);
+	name = devm_kasprintf(cmn->dev, GFP_KERNEL, "arm_cmn_%d", this_id);
+	if (!name)
+		return -ENOMEM;
+
+	err = cpuhp_state_add_instance(arm_cmn_hp_state, &cmn->cpuhp_node);
+	if (err)
+		return err;
+
+	err = perf_pmu_register(&cmn->pmu, name, -1);
+	if (err)
+		cpuhp_state_remove_instance_nocalls(arm_cmn_hp_state, &cmn->cpuhp_node);
+	else
+		arm_cmn_debugfs_init(cmn, this_id);
+
+	return err;
+}
+
+static int arm_cmn_remove(struct platform_device *pdev)
+{
+	struct arm_cmn *cmn = platform_get_drvdata(pdev);
+
+	writel_relaxed(0, cmn->dtc[0].base + CMN_DT_DTC_CTL);
+
+	perf_pmu_unregister(&cmn->pmu);
+	cpuhp_state_remove_instance_nocalls(arm_cmn_hp_state, &cmn->cpuhp_node);
+	debugfs_remove(cmn->debug);
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id arm_cmn_of_match[] = {
+	{ .compatible = "arm,cmn-600", .data = (void *)PART_CMN600 },
+	{ .compatible = "arm,cmn-650" },
+	{ .compatible = "arm,cmn-700" },
+	{ .compatible = "arm,ci-700" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, arm_cmn_of_match);
+#endif
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id arm_cmn_acpi_match[] = {
+	{ "ARMHC600", PART_CMN600 },
+	{ "ARMHC650" },
+	{ "ARMHC700" },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, arm_cmn_acpi_match);
+#endif
+
+static struct platform_driver arm_cmn_driver = {
+	.driver = {
+		.name = "arm-cmn",
+		.of_match_table = of_match_ptr(arm_cmn_of_match),
+		.acpi_match_table = ACPI_PTR(arm_cmn_acpi_match),
+	},
+	.probe = arm_cmn_probe,
+	.remove = arm_cmn_remove,
+};
+
+static int __init arm_cmn_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+				      "perf/arm/cmn:online",
+				      arm_cmn_pmu_online_cpu,
+				      arm_cmn_pmu_offline_cpu);
+	if (ret < 0)
+		return ret;
+
+	arm_cmn_hp_state = ret;
+	arm_cmn_debugfs = debugfs_create_dir("arm-cmn", NULL);
+
+	ret = platform_driver_register(&arm_cmn_driver);
+	if (ret) {
+		cpuhp_remove_multi_state(arm_cmn_hp_state);
+		debugfs_remove(arm_cmn_debugfs);
+	}
+	return ret;
+}
+
+static void __exit arm_cmn_exit(void)
+{
+	platform_driver_unregister(&arm_cmn_driver);
+	cpuhp_remove_multi_state(arm_cmn_hp_state);
+	debugfs_remove(arm_cmn_debugfs);
+}
+
+module_init(arm_cmn_init);
+module_exit(arm_cmn_exit);
+
+MODULE_AUTHOR("Robin Murphy <robin.murphy@arm.com>");
+MODULE_DESCRIPTION("Arm CMN-600 PMU driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm_cspmu/Kconfig b/drivers/perf/arm_cspmu/Kconfig
new file mode 100644
index 0000000000..25d25ded09
--- /dev/null
+++ b/drivers/perf/arm_cspmu/Kconfig
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
+	tristate "ARM Coresight Architecture PMU"
+	depends on ARM64 || COMPILE_TEST
+	help
+	  Provides support for performance monitoring unit (PMU) devices
+	  based on ARM CoreSight PMU architecture. Note that this PMU
+	  architecture does not have relationship with the ARM CoreSight
+	  Self-Hosted Tracing.
diff --git a/drivers/perf/arm_cspmu/Makefile b/drivers/perf/arm_cspmu/Makefile
new file mode 100644
index 0000000000..fedb17df98
--- /dev/null
+++ b/drivers/perf/arm_cspmu/Makefile
@@ -0,0 +1,6 @@
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu_module.o
+arm_cspmu_module-y := arm_cspmu.o nvidia_cspmu.o
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c
new file mode 100644
index 0000000000..9363c31f31
--- /dev/null
+++ b/drivers/perf/arm_cspmu/arm_cspmu.c
@@ -0,0 +1,1309 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM CoreSight Architecture PMU driver.
+ *
+ * This driver adds support for uncore PMU based on ARM CoreSight Performance
+ * Monitoring Unit Architecture. The PMU is accessible via MMIO registers and
+ * like other uncore PMUs, it does not support process specific events and
+ * cannot be used in sampling mode.
+ *
+ * This code is based on other uncore PMUs like ARM DSU PMU. It provides a
+ * generic implementation to operate the PMU according to CoreSight PMU
+ * architecture and ACPI ARM PMU table (APMT) documents below:
+ *   - ARM CoreSight PMU architecture document number: ARM IHI 0091 A.a-00bet0.
+ *   - APMT document number: ARM DEN0117.
+ *
+ * The user should refer to the vendor technical documentation to get details
+ * about the supported events.
+ *
+ * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ */
+
+#include <linux/acpi.h>
+#include <linux/cacheinfo.h>
+#include <linux/ctype.h>
+#include <linux/interrupt.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/module.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+
+#include "arm_cspmu.h"
+#include "nvidia_cspmu.h"
+
+#define PMUNAME "arm_cspmu"
+#define DRVNAME "arm-cs-arch-pmu"
+
+#define ARM_CSPMU_CPUMASK_ATTR(_name, _config)			\
+	ARM_CSPMU_EXT_ATTR(_name, arm_cspmu_cpumask_show,	\
+				(unsigned long)_config)
+
+/*
+ * CoreSight PMU Arch register offsets.
+ */
+#define PMEVCNTR_LO					0x0
+#define PMEVCNTR_HI					0x4
+#define PMEVTYPER					0x400
+#define PMCCFILTR					0x47C
+#define PMEVFILTR					0xA00
+#define PMCNTENSET					0xC00
+#define PMCNTENCLR					0xC20
+#define PMINTENSET					0xC40
+#define PMINTENCLR					0xC60
+#define PMOVSCLR					0xC80
+#define PMOVSSET					0xCC0
+#define PMCFGR						0xE00
+#define PMCR						0xE04
+#define PMIIDR						0xE08
+
+/* PMCFGR register field */
+#define PMCFGR_NCG					GENMASK(31, 28)
+#define PMCFGR_HDBG					BIT(24)
+#define PMCFGR_TRO					BIT(23)
+#define PMCFGR_SS					BIT(22)
+#define PMCFGR_FZO					BIT(21)
+#define PMCFGR_MSI					BIT(20)
+#define PMCFGR_UEN					BIT(19)
+#define PMCFGR_NA					BIT(17)
+#define PMCFGR_EX					BIT(16)
+#define PMCFGR_CCD					BIT(15)
+#define PMCFGR_CC					BIT(14)
+#define PMCFGR_SIZE					GENMASK(13, 8)
+#define PMCFGR_N					GENMASK(7, 0)
+
+/* PMCR register field */
+#define PMCR_TRO					BIT(11)
+#define PMCR_HDBG					BIT(10)
+#define PMCR_FZO					BIT(9)
+#define PMCR_NA						BIT(8)
+#define PMCR_DP						BIT(5)
+#define PMCR_X						BIT(4)
+#define PMCR_D						BIT(3)
+#define PMCR_C						BIT(2)
+#define PMCR_P						BIT(1)
+#define PMCR_E						BIT(0)
+
+/* Each SET/CLR register supports up to 32 counters. */
+#define ARM_CSPMU_SET_CLR_COUNTER_SHIFT		5
+#define ARM_CSPMU_SET_CLR_COUNTER_NUM		\
+	(1 << ARM_CSPMU_SET_CLR_COUNTER_SHIFT)
+
+/* Convert counter idx into SET/CLR register number. */
+#define COUNTER_TO_SET_CLR_ID(idx)			\
+	(idx >> ARM_CSPMU_SET_CLR_COUNTER_SHIFT)
+
+/* Convert counter idx into SET/CLR register bit. */
+#define COUNTER_TO_SET_CLR_BIT(idx)			\
+	(idx & (ARM_CSPMU_SET_CLR_COUNTER_NUM - 1))
+
+#define ARM_CSPMU_ACTIVE_CPU_MASK		0x0
+#define ARM_CSPMU_ASSOCIATED_CPU_MASK		0x1
+
+/* Check and use default if implementer doesn't provide attribute callback */
+#define CHECK_DEFAULT_IMPL_OPS(ops, callback)			\
+	do {							\
+		if (!ops->callback)				\
+			ops->callback = arm_cspmu_ ## callback;	\
+	} while (0)
+
+/*
+ * Maximum poll count for reading counter value using high-low-high sequence.
+ */
+#define HILOHI_MAX_POLL	1000
+
+/* JEDEC-assigned JEP106 identification code */
+#define ARM_CSPMU_IMPL_ID_NVIDIA		0x36B
+
+static unsigned long arm_cspmu_cpuhp_state;
+
+static struct acpi_apmt_node *arm_cspmu_apmt_node(struct device *dev)
+{
+	return *(struct acpi_apmt_node **)dev_get_platdata(dev);
+}
+
+/*
+ * In CoreSight PMU architecture, all of the MMIO registers are 32-bit except
+ * counter register. The counter register can be implemented as 32-bit or 64-bit
+ * register depending on the value of PMCFGR.SIZE field. For 64-bit access,
+ * single-copy 64-bit atomic support is implementation defined. APMT node flag
+ * is used to identify if the PMU supports 64-bit single copy atomic. If 64-bit
+ * single copy atomic is not supported, the driver treats the register as a pair
+ * of 32-bit register.
+ */
+
+/*
+ * Read 64-bit register as a pair of 32-bit registers using hi-lo-hi sequence.
+ */
+static u64 read_reg64_hilohi(const void __iomem *addr, u32 max_poll_count)
+{
+	u32 val_lo, val_hi;
+	u64 val;
+
+	/* Use high-low-high sequence to avoid tearing */
+	do {
+		if (max_poll_count-- == 0) {
+			pr_err("ARM CSPMU: timeout hi-low-high sequence\n");
+			return 0;
+		}
+
+		val_hi = readl(addr + 4);
+		val_lo = readl(addr);
+	} while (val_hi != readl(addr + 4));
+
+	val = (((u64)val_hi << 32) | val_lo);
+
+	return val;
+}
+
+/* Check if cycle counter is supported. */
+static inline bool supports_cycle_counter(const struct arm_cspmu *cspmu)
+{
+	return (cspmu->pmcfgr & PMCFGR_CC);
+}
+
+/* Get counter size, which is (PMCFGR_SIZE + 1). */
+static inline u32 counter_size(const struct arm_cspmu *cspmu)
+{
+	return FIELD_GET(PMCFGR_SIZE, cspmu->pmcfgr) + 1;
+}
+
+/* Get counter mask. */
+static inline u64 counter_mask(const struct arm_cspmu *cspmu)
+{
+	return GENMASK_ULL(counter_size(cspmu) - 1, 0);
+}
+
+/* Check if counter is implemented as 64-bit register. */
+static inline bool use_64b_counter_reg(const struct arm_cspmu *cspmu)
+{
+	return (counter_size(cspmu) > 32);
+}
+
+ssize_t arm_cspmu_sysfs_event_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, typeof(*pmu_attr), attr);
+	return sysfs_emit(buf, "event=0x%llx\n", pmu_attr->id);
+}
+EXPORT_SYMBOL_GPL(arm_cspmu_sysfs_event_show);
+
+/* Default event list. */
+static struct attribute *arm_cspmu_event_attrs[] = {
+	ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
+	NULL,
+};
+
+static struct attribute **
+arm_cspmu_get_event_attrs(const struct arm_cspmu *cspmu)
+{
+	struct attribute **attrs;
+
+	attrs = devm_kmemdup(cspmu->dev, arm_cspmu_event_attrs,
+		sizeof(arm_cspmu_event_attrs), GFP_KERNEL);
+
+	return attrs;
+}
+
+static umode_t
+arm_cspmu_event_attr_is_visible(struct kobject *kobj,
+				struct attribute *attr, int unused)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct arm_cspmu *cspmu = to_arm_cspmu(dev_get_drvdata(dev));
+	struct perf_pmu_events_attr *eattr;
+
+	eattr = container_of(attr, typeof(*eattr), attr.attr);
+
+	/* Hide cycle event if not supported */
+	if (!supports_cycle_counter(cspmu) &&
+	    eattr->id == ARM_CSPMU_EVT_CYCLES_DEFAULT)
+		return 0;
+
+	return attr->mode;
+}
+
+ssize_t arm_cspmu_sysfs_format_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct dev_ext_attribute *eattr =
+		container_of(attr, struct dev_ext_attribute, attr);
+	return sysfs_emit(buf, "%s\n", (char *)eattr->var);
+}
+EXPORT_SYMBOL_GPL(arm_cspmu_sysfs_format_show);
+
+static struct attribute *arm_cspmu_format_attrs[] = {
+	ARM_CSPMU_FORMAT_EVENT_ATTR,
+	ARM_CSPMU_FORMAT_FILTER_ATTR,
+	NULL,
+};
+
+static struct attribute **
+arm_cspmu_get_format_attrs(const struct arm_cspmu *cspmu)
+{
+	struct attribute **attrs;
+
+	attrs = devm_kmemdup(cspmu->dev, arm_cspmu_format_attrs,
+		sizeof(arm_cspmu_format_attrs), GFP_KERNEL);
+
+	return attrs;
+}
+
+static u32 arm_cspmu_event_type(const struct perf_event *event)
+{
+	return event->attr.config & ARM_CSPMU_EVENT_MASK;
+}
+
+static bool arm_cspmu_is_cycle_counter_event(const struct perf_event *event)
+{
+	return (event->attr.config == ARM_CSPMU_EVT_CYCLES_DEFAULT);
+}
+
+static u32 arm_cspmu_event_filter(const struct perf_event *event)
+{
+	return event->attr.config1 & ARM_CSPMU_FILTER_MASK;
+}
+
+static ssize_t arm_cspmu_identifier_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *page)
+{
+	struct arm_cspmu *cspmu = to_arm_cspmu(dev_get_drvdata(dev));
+
+	return sysfs_emit(page, "%s\n", cspmu->identifier);
+}
+
+static struct device_attribute arm_cspmu_identifier_attr =
+	__ATTR(identifier, 0444, arm_cspmu_identifier_show, NULL);
+
+static struct attribute *arm_cspmu_identifier_attrs[] = {
+	&arm_cspmu_identifier_attr.attr,
+	NULL,
+};
+
+static struct attribute_group arm_cspmu_identifier_attr_group = {
+	.attrs = arm_cspmu_identifier_attrs,
+};
+
+static const char *arm_cspmu_get_identifier(const struct arm_cspmu *cspmu)
+{
+	const char *identifier =
+		devm_kasprintf(cspmu->dev, GFP_KERNEL, "%x",
+			       cspmu->impl.pmiidr);
+	return identifier;
+}
+
+static const char *arm_cspmu_type_str[ACPI_APMT_NODE_TYPE_COUNT] = {
+	"mc",
+	"smmu",
+	"pcie",
+	"acpi",
+	"cache",
+};
+
+static const char *arm_cspmu_get_name(const struct arm_cspmu *cspmu)
+{
+	struct device *dev;
+	struct acpi_apmt_node *apmt_node;
+	u8 pmu_type;
+	char *name;
+	char acpi_hid_string[ACPI_ID_LEN] = { 0 };
+	static atomic_t pmu_idx[ACPI_APMT_NODE_TYPE_COUNT] = { 0 };
+
+	dev = cspmu->dev;
+	apmt_node = arm_cspmu_apmt_node(dev);
+	pmu_type = apmt_node->type;
+
+	if (pmu_type >= ACPI_APMT_NODE_TYPE_COUNT) {
+		dev_err(dev, "unsupported PMU type-%u\n", pmu_type);
+		return NULL;
+	}
+
+	if (pmu_type == ACPI_APMT_NODE_TYPE_ACPI) {
+		memcpy(acpi_hid_string,
+			&apmt_node->inst_primary,
+			sizeof(apmt_node->inst_primary));
+		name = devm_kasprintf(dev, GFP_KERNEL, "%s_%s_%s_%u", PMUNAME,
+				      arm_cspmu_type_str[pmu_type],
+				      acpi_hid_string,
+				      apmt_node->inst_secondary);
+	} else {
+		name = devm_kasprintf(dev, GFP_KERNEL, "%s_%s_%d", PMUNAME,
+				      arm_cspmu_type_str[pmu_type],
+				      atomic_fetch_inc(&pmu_idx[pmu_type]));
+	}
+
+	return name;
+}
+
+static ssize_t arm_cspmu_cpumask_show(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
+{
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct arm_cspmu *cspmu = to_arm_cspmu(pmu);
+	struct dev_ext_attribute *eattr =
+		container_of(attr, struct dev_ext_attribute, attr);
+	unsigned long mask_id = (unsigned long)eattr->var;
+	const cpumask_t *cpumask;
+
+	switch (mask_id) {
+	case ARM_CSPMU_ACTIVE_CPU_MASK:
+		cpumask = &cspmu->active_cpu;
+		break;
+	case ARM_CSPMU_ASSOCIATED_CPU_MASK:
+		cpumask = &cspmu->associated_cpus;
+		break;
+	default:
+		return 0;
+	}
+	return cpumap_print_to_pagebuf(true, buf, cpumask);
+}
+
+static struct attribute *arm_cspmu_cpumask_attrs[] = {
+	ARM_CSPMU_CPUMASK_ATTR(cpumask, ARM_CSPMU_ACTIVE_CPU_MASK),
+	ARM_CSPMU_CPUMASK_ATTR(associated_cpus, ARM_CSPMU_ASSOCIATED_CPU_MASK),
+	NULL,
+};
+
+static struct attribute_group arm_cspmu_cpumask_attr_group = {
+	.attrs = arm_cspmu_cpumask_attrs,
+};
+
+struct impl_match {
+	u32 pmiidr;
+	u32 mask;
+	int (*impl_init_ops)(struct arm_cspmu *cspmu);
+};
+
+static const struct impl_match impl_match[] = {
+	{
+	  .pmiidr = ARM_CSPMU_IMPL_ID_NVIDIA,
+	  .mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
+	  .impl_init_ops = nv_cspmu_init_ops
+	},
+	{}
+};
+
+static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
+{
+	int ret;
+	struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
+	struct acpi_apmt_node *apmt_node = arm_cspmu_apmt_node(cspmu->dev);
+	const struct impl_match *match = impl_match;
+
+	/*
+	 * Get PMU implementer and product id from APMT node.
+	 * If APMT node doesn't have implementer/product id, try get it
+	 * from PMIIDR.
+	 */
+	cspmu->impl.pmiidr =
+		(apmt_node->impl_id) ? apmt_node->impl_id :
+				       readl(cspmu->base0 + PMIIDR);
+
+	/* Find implementer specific attribute ops. */
+	for (; match->pmiidr; match++) {
+		const u32 mask = match->mask;
+
+		if ((match->pmiidr & mask) == (cspmu->impl.pmiidr & mask)) {
+			ret = match->impl_init_ops(cspmu);
+			if (ret)
+				return ret;
+
+			break;
+		}
+	}
+
+	/* Use default callbacks if implementer doesn't provide one. */
+	CHECK_DEFAULT_IMPL_OPS(impl_ops, get_event_attrs);
+	CHECK_DEFAULT_IMPL_OPS(impl_ops, get_format_attrs);
+	CHECK_DEFAULT_IMPL_OPS(impl_ops, get_identifier);
+	CHECK_DEFAULT_IMPL_OPS(impl_ops, get_name);
+	CHECK_DEFAULT_IMPL_OPS(impl_ops, is_cycle_counter_event);
+	CHECK_DEFAULT_IMPL_OPS(impl_ops, event_type);
+	CHECK_DEFAULT_IMPL_OPS(impl_ops, event_filter);
+	CHECK_DEFAULT_IMPL_OPS(impl_ops, event_attr_is_visible);
+
+	return 0;
+}
+
+static struct attribute_group *
+arm_cspmu_alloc_event_attr_group(struct arm_cspmu *cspmu)
+{
+	struct attribute_group *event_group;
+	struct device *dev = cspmu->dev;
+	const struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
+
+	event_group =
+		devm_kzalloc(dev, sizeof(struct attribute_group), GFP_KERNEL);
+	if (!event_group)
+		return NULL;
+
+	event_group->name = "events";
+	event_group->is_visible = impl_ops->event_attr_is_visible;
+	event_group->attrs = impl_ops->get_event_attrs(cspmu);
+
+	if (!event_group->attrs)
+		return NULL;
+
+	return event_group;
+}
+
+static struct attribute_group *
+arm_cspmu_alloc_format_attr_group(struct arm_cspmu *cspmu)
+{
+	struct attribute_group *format_group;
+	struct device *dev = cspmu->dev;
+
+	format_group =
+		devm_kzalloc(dev, sizeof(struct attribute_group), GFP_KERNEL);
+	if (!format_group)
+		return NULL;
+
+	format_group->name = "format";
+	format_group->attrs = cspmu->impl.ops.get_format_attrs(cspmu);
+
+	if (!format_group->attrs)
+		return NULL;
+
+	return format_group;
+}
+
+static struct attribute_group **
+arm_cspmu_alloc_attr_group(struct arm_cspmu *cspmu)
+{
+	struct attribute_group **attr_groups = NULL;
+	struct device *dev = cspmu->dev;
+	const struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
+	int ret;
+
+	ret = arm_cspmu_init_impl_ops(cspmu);
+	if (ret)
+		return NULL;
+
+	cspmu->identifier = impl_ops->get_identifier(cspmu);
+	cspmu->name = impl_ops->get_name(cspmu);
+
+	if (!cspmu->identifier || !cspmu->name)
+		return NULL;
+
+	attr_groups = devm_kcalloc(dev, 5, sizeof(struct attribute_group *),
+				   GFP_KERNEL);
+	if (!attr_groups)
+		return NULL;
+
+	attr_groups[0] = arm_cspmu_alloc_event_attr_group(cspmu);
+	attr_groups[1] = arm_cspmu_alloc_format_attr_group(cspmu);
+	attr_groups[2] = &arm_cspmu_identifier_attr_group;
+	attr_groups[3] = &arm_cspmu_cpumask_attr_group;
+
+	if (!attr_groups[0] || !attr_groups[1])
+		return NULL;
+
+	return attr_groups;
+}
+
+static inline void arm_cspmu_reset_counters(struct arm_cspmu *cspmu)
+{
+	u32 pmcr = 0;
+
+	pmcr |= PMCR_P;
+	pmcr |= PMCR_C;
+	writel(pmcr, cspmu->base0 + PMCR);
+}
+
+static inline void arm_cspmu_start_counters(struct arm_cspmu *cspmu)
+{
+	writel(PMCR_E, cspmu->base0 + PMCR);
+}
+
+static inline void arm_cspmu_stop_counters(struct arm_cspmu *cspmu)
+{
+	writel(0, cspmu->base0 + PMCR);
+}
+
+static void arm_cspmu_enable(struct pmu *pmu)
+{
+	bool disabled;
+	struct arm_cspmu *cspmu = to_arm_cspmu(pmu);
+
+	disabled = bitmap_empty(cspmu->hw_events.used_ctrs,
+				cspmu->num_logical_ctrs);
+
+	if (disabled)
+		return;
+
+	arm_cspmu_start_counters(cspmu);
+}
+
+static void arm_cspmu_disable(struct pmu *pmu)
+{
+	struct arm_cspmu *cspmu = to_arm_cspmu(pmu);
+
+	arm_cspmu_stop_counters(cspmu);
+}
+
+static int arm_cspmu_get_event_idx(struct arm_cspmu_hw_events *hw_events,
+				struct perf_event *event)
+{
+	int idx;
+	struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+
+	if (supports_cycle_counter(cspmu)) {
+		if (cspmu->impl.ops.is_cycle_counter_event(event)) {
+			/* Search for available cycle counter. */
+			if (test_and_set_bit(cspmu->cycle_counter_logical_idx,
+					     hw_events->used_ctrs))
+				return -EAGAIN;
+
+			return cspmu->cycle_counter_logical_idx;
+		}
+
+		/*
+		 * Search a regular counter from the used counter bitmap.
+		 * The cycle counter divides the bitmap into two parts. Search
+		 * the first then second half to exclude the cycle counter bit.
+		 */
+		idx = find_first_zero_bit(hw_events->used_ctrs,
+					  cspmu->cycle_counter_logical_idx);
+		if (idx >= cspmu->cycle_counter_logical_idx) {
+			idx = find_next_zero_bit(
+				hw_events->used_ctrs,
+				cspmu->num_logical_ctrs,
+				cspmu->cycle_counter_logical_idx + 1);
+		}
+	} else {
+		idx = find_first_zero_bit(hw_events->used_ctrs,
+					  cspmu->num_logical_ctrs);
+	}
+
+	if (idx >= cspmu->num_logical_ctrs)
+		return -EAGAIN;
+
+	set_bit(idx, hw_events->used_ctrs);
+
+	return idx;
+}
+
+static bool arm_cspmu_validate_event(struct pmu *pmu,
+				 struct arm_cspmu_hw_events *hw_events,
+				 struct perf_event *event)
+{
+	if (is_software_event(event))
+		return true;
+
+	/* Reject groups spanning multiple HW PMUs. */
+	if (event->pmu != pmu)
+		return false;
+
+	return (arm_cspmu_get_event_idx(hw_events, event) >= 0);
+}
+
+/*
+ * Make sure the group of events can be scheduled at once
+ * on the PMU.
+ */
+static bool arm_cspmu_validate_group(struct perf_event *event)
+{
+	struct perf_event *sibling, *leader = event->group_leader;
+	struct arm_cspmu_hw_events fake_hw_events;
+
+	if (event->group_leader == event)
+		return true;
+
+	memset(&fake_hw_events, 0, sizeof(fake_hw_events));
+
+	if (!arm_cspmu_validate_event(event->pmu, &fake_hw_events, leader))
+		return false;
+
+	for_each_sibling_event(sibling, leader) {
+		if (!arm_cspmu_validate_event(event->pmu, &fake_hw_events,
+						  sibling))
+			return false;
+	}
+
+	return arm_cspmu_validate_event(event->pmu, &fake_hw_events, event);
+}
+
+static int arm_cspmu_event_init(struct perf_event *event)
+{
+	struct arm_cspmu *cspmu;
+	struct hw_perf_event *hwc = &event->hw;
+
+	cspmu = to_arm_cspmu(event->pmu);
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/*
+	 * Following other "uncore" PMUs, we do not support sampling mode or
+	 * attach to a task (per-process mode).
+	 */
+	if (is_sampling_event(event)) {
+		dev_dbg(cspmu->pmu.dev,
+			"Can't support sampling events\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK) {
+		dev_dbg(cspmu->pmu.dev,
+			"Can't support per-task counters\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Make sure the CPU assignment is on one of the CPUs associated with
+	 * this PMU.
+	 */
+	if (!cpumask_test_cpu(event->cpu, &cspmu->associated_cpus)) {
+		dev_dbg(cspmu->pmu.dev,
+			"Requested cpu is not associated with the PMU\n");
+		return -EINVAL;
+	}
+
+	/* Enforce the current active CPU to handle the events in this PMU. */
+	event->cpu = cpumask_first(&cspmu->active_cpu);
+	if (event->cpu >= nr_cpu_ids)
+		return -EINVAL;
+
+	if (!arm_cspmu_validate_group(event))
+		return -EINVAL;
+
+	/*
+	 * The logical counter id is tracked with hw_perf_event.extra_reg.idx.
+	 * The physical counter id is tracked with hw_perf_event.idx.
+	 * We don't assign an index until we actually place the event onto
+	 * hardware. Use -1 to signify that we haven't decided where to put it
+	 * yet.
+	 */
+	hwc->idx = -1;
+	hwc->extra_reg.idx = -1;
+	hwc->config = cspmu->impl.ops.event_type(event);
+
+	return 0;
+}
+
+static inline u32 counter_offset(u32 reg_sz, u32 ctr_idx)
+{
+	return (PMEVCNTR_LO + (reg_sz * ctr_idx));
+}
+
+static void arm_cspmu_write_counter(struct perf_event *event, u64 val)
+{
+	u32 offset;
+	struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+
+	if (use_64b_counter_reg(cspmu)) {
+		offset = counter_offset(sizeof(u64), event->hw.idx);
+
+		writeq(val, cspmu->base1 + offset);
+	} else {
+		offset = counter_offset(sizeof(u32), event->hw.idx);
+
+		writel(lower_32_bits(val), cspmu->base1 + offset);
+	}
+}
+
+static u64 arm_cspmu_read_counter(struct perf_event *event)
+{
+	u32 offset;
+	const void __iomem *counter_addr;
+	struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+
+	if (use_64b_counter_reg(cspmu)) {
+		offset = counter_offset(sizeof(u64), event->hw.idx);
+		counter_addr = cspmu->base1 + offset;
+
+		return cspmu->has_atomic_dword ?
+			       readq(counter_addr) :
+			       read_reg64_hilohi(counter_addr, HILOHI_MAX_POLL);
+	}
+
+	offset = counter_offset(sizeof(u32), event->hw.idx);
+	return readl(cspmu->base1 + offset);
+}
+
+/*
+ * arm_cspmu_set_event_period: Set the period for the counter.
+ *
+ * To handle cases of extreme interrupt latency, we program
+ * the counter with half of the max count for the counters.
+ */
+static void arm_cspmu_set_event_period(struct perf_event *event)
+{
+	struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+	u64 val = counter_mask(cspmu) >> 1ULL;
+
+	local64_set(&event->hw.prev_count, val);
+	arm_cspmu_write_counter(event, val);
+}
+
+static void arm_cspmu_enable_counter(struct arm_cspmu *cspmu, int idx)
+{
+	u32 reg_id, reg_bit, inten_off, cnten_off;
+
+	reg_id = COUNTER_TO_SET_CLR_ID(idx);
+	reg_bit = COUNTER_TO_SET_CLR_BIT(idx);
+
+	inten_off = PMINTENSET + (4 * reg_id);
+	cnten_off = PMCNTENSET + (4 * reg_id);
+
+	writel(BIT(reg_bit), cspmu->base0 + inten_off);
+	writel(BIT(reg_bit), cspmu->base0 + cnten_off);
+}
+
+static void arm_cspmu_disable_counter(struct arm_cspmu *cspmu, int idx)
+{
+	u32 reg_id, reg_bit, inten_off, cnten_off;
+
+	reg_id = COUNTER_TO_SET_CLR_ID(idx);
+	reg_bit = COUNTER_TO_SET_CLR_BIT(idx);
+
+	inten_off = PMINTENCLR + (4 * reg_id);
+	cnten_off = PMCNTENCLR + (4 * reg_id);
+
+	writel(BIT(reg_bit), cspmu->base0 + cnten_off);
+	writel(BIT(reg_bit), cspmu->base0 + inten_off);
+}
+
+static void arm_cspmu_event_update(struct perf_event *event)
+{
+	struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 delta, prev, now;
+
+	do {
+		prev = local64_read(&hwc->prev_count);
+		now = arm_cspmu_read_counter(event);
+	} while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
+
+	delta = (now - prev) & counter_mask(cspmu);
+	local64_add(delta, &event->count);
+}
+
+static inline void arm_cspmu_set_event(struct arm_cspmu *cspmu,
+					struct hw_perf_event *hwc)
+{
+	u32 offset = PMEVTYPER + (4 * hwc->idx);
+
+	writel(hwc->config, cspmu->base0 + offset);
+}
+
+static inline void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
+					   struct hw_perf_event *hwc,
+					   u32 filter)
+{
+	u32 offset = PMEVFILTR + (4 * hwc->idx);
+
+	writel(filter, cspmu->base0 + offset);
+}
+
+static inline void arm_cspmu_set_cc_filter(struct arm_cspmu *cspmu, u32 filter)
+{
+	u32 offset = PMCCFILTR;
+
+	writel(filter, cspmu->base0 + offset);
+}
+
+static void arm_cspmu_start(struct perf_event *event, int pmu_flags)
+{
+	struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u32 filter;
+
+	/* We always reprogram the counter */
+	if (pmu_flags & PERF_EF_RELOAD)
+		WARN_ON(!(hwc->state & PERF_HES_UPTODATE));
+
+	arm_cspmu_set_event_period(event);
+
+	filter = cspmu->impl.ops.event_filter(event);
+
+	if (event->hw.extra_reg.idx == cspmu->cycle_counter_logical_idx) {
+		arm_cspmu_set_cc_filter(cspmu, filter);
+	} else {
+		arm_cspmu_set_event(cspmu, hwc);
+		arm_cspmu_set_ev_filter(cspmu, hwc, filter);
+	}
+
+	hwc->state = 0;
+
+	arm_cspmu_enable_counter(cspmu, hwc->idx);
+}
+
+static void arm_cspmu_stop(struct perf_event *event, int pmu_flags)
+{
+	struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (hwc->state & PERF_HES_STOPPED)
+		return;
+
+	arm_cspmu_disable_counter(cspmu, hwc->idx);
+	arm_cspmu_event_update(event);
+
+	hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static inline u32 to_phys_idx(struct arm_cspmu *cspmu, u32 idx)
+{
+	return (idx == cspmu->cycle_counter_logical_idx) ?
+		ARM_CSPMU_CYCLE_CNTR_IDX : idx;
+}
+
+static int arm_cspmu_add(struct perf_event *event, int flags)
+{
+	struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+	struct arm_cspmu_hw_events *hw_events = &cspmu->hw_events;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+
+	if (WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(),
+					   &cspmu->associated_cpus)))
+		return -ENOENT;
+
+	idx = arm_cspmu_get_event_idx(hw_events, event);
+	if (idx < 0)
+		return idx;
+
+	hw_events->events[idx] = event;
+	hwc->idx = to_phys_idx(cspmu, idx);
+	hwc->extra_reg.idx = idx;
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+	if (flags & PERF_EF_START)
+		arm_cspmu_start(event, PERF_EF_RELOAD);
+
+	/* Propagate changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+	return 0;
+}
+
+static void arm_cspmu_del(struct perf_event *event, int flags)
+{
+	struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
+	struct arm_cspmu_hw_events *hw_events = &cspmu->hw_events;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->extra_reg.idx;
+
+	arm_cspmu_stop(event, PERF_EF_UPDATE);
+
+	hw_events->events[idx] = NULL;
+
+	clear_bit(idx, hw_events->used_ctrs);
+
+	perf_event_update_userpage(event);
+}
+
+static void arm_cspmu_read(struct perf_event *event)
+{
+	arm_cspmu_event_update(event);
+}
+
+static struct arm_cspmu *arm_cspmu_alloc(struct platform_device *pdev)
+{
+	struct acpi_apmt_node *apmt_node;
+	struct arm_cspmu *cspmu;
+	struct device *dev = &pdev->dev;
+
+	cspmu = devm_kzalloc(dev, sizeof(*cspmu), GFP_KERNEL);
+	if (!cspmu)
+		return NULL;
+
+	cspmu->dev = dev;
+	platform_set_drvdata(pdev, cspmu);
+
+	apmt_node = arm_cspmu_apmt_node(dev);
+	cspmu->has_atomic_dword = apmt_node->flags & ACPI_APMT_FLAGS_ATOMIC;
+
+	return cspmu;
+}
+
+static int arm_cspmu_init_mmio(struct arm_cspmu *cspmu)
+{
+	struct device *dev;
+	struct platform_device *pdev;
+
+	dev = cspmu->dev;
+	pdev = to_platform_device(dev);
+
+	/* Base address for page 0. */
+	cspmu->base0 = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(cspmu->base0)) {
+		dev_err(dev, "ioremap failed for page-0 resource\n");
+		return PTR_ERR(cspmu->base0);
+	}
+
+	/* Base address for page 1 if supported. Otherwise point to page 0. */
+	cspmu->base1 = cspmu->base0;
+	if (platform_get_resource(pdev, IORESOURCE_MEM, 1)) {
+		cspmu->base1 = devm_platform_ioremap_resource(pdev, 1);
+		if (IS_ERR(cspmu->base1)) {
+			dev_err(dev, "ioremap failed for page-1 resource\n");
+			return PTR_ERR(cspmu->base1);
+		}
+	}
+
+	cspmu->pmcfgr = readl(cspmu->base0 + PMCFGR);
+
+	cspmu->num_logical_ctrs = FIELD_GET(PMCFGR_N, cspmu->pmcfgr) + 1;
+
+	cspmu->cycle_counter_logical_idx = ARM_CSPMU_MAX_HW_CNTRS;
+
+	if (supports_cycle_counter(cspmu)) {
+		/*
+		 * The last logical counter is mapped to cycle counter if
+		 * there is a gap between regular and cycle counter. Otherwise,
+		 * logical and physical have 1-to-1 mapping.
+		 */
+		cspmu->cycle_counter_logical_idx =
+			(cspmu->num_logical_ctrs <= ARM_CSPMU_CYCLE_CNTR_IDX) ?
+				cspmu->num_logical_ctrs - 1 :
+				ARM_CSPMU_CYCLE_CNTR_IDX;
+	}
+
+	cspmu->num_set_clr_reg =
+		DIV_ROUND_UP(cspmu->num_logical_ctrs,
+				ARM_CSPMU_SET_CLR_COUNTER_NUM);
+
+	cspmu->hw_events.events =
+		devm_kcalloc(dev, cspmu->num_logical_ctrs,
+			     sizeof(*cspmu->hw_events.events), GFP_KERNEL);
+
+	if (!cspmu->hw_events.events)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static inline int arm_cspmu_get_reset_overflow(struct arm_cspmu *cspmu,
+					       u32 *pmovs)
+{
+	int i;
+	u32 pmovclr_offset = PMOVSCLR;
+	u32 has_overflowed = 0;
+
+	for (i = 0; i < cspmu->num_set_clr_reg; ++i) {
+		pmovs[i] = readl(cspmu->base1 + pmovclr_offset);
+		has_overflowed |= pmovs[i];
+		writel(pmovs[i], cspmu->base1 + pmovclr_offset);
+		pmovclr_offset += sizeof(u32);
+	}
+
+	return has_overflowed != 0;
+}
+
+static irqreturn_t arm_cspmu_handle_irq(int irq_num, void *dev)
+{
+	int idx, has_overflowed;
+	struct perf_event *event;
+	struct arm_cspmu *cspmu = dev;
+	DECLARE_BITMAP(pmovs, ARM_CSPMU_MAX_HW_CNTRS);
+	bool handled = false;
+
+	arm_cspmu_stop_counters(cspmu);
+
+	has_overflowed = arm_cspmu_get_reset_overflow(cspmu, (u32 *)pmovs);
+	if (!has_overflowed)
+		goto done;
+
+	for_each_set_bit(idx, cspmu->hw_events.used_ctrs,
+			cspmu->num_logical_ctrs) {
+		event = cspmu->hw_events.events[idx];
+
+		if (!event)
+			continue;
+
+		if (!test_bit(event->hw.idx, pmovs))
+			continue;
+
+		arm_cspmu_event_update(event);
+		arm_cspmu_set_event_period(event);
+
+		handled = true;
+	}
+
+done:
+	arm_cspmu_start_counters(cspmu);
+	return IRQ_RETVAL(handled);
+}
+
+static int arm_cspmu_request_irq(struct arm_cspmu *cspmu)
+{
+	int irq, ret;
+	struct device *dev;
+	struct platform_device *pdev;
+
+	dev = cspmu->dev;
+	pdev = to_platform_device(dev);
+
+	/* Skip IRQ request if the PMU does not support overflow interrupt. */
+	irq = platform_get_irq_optional(pdev, 0);
+	if (irq < 0)
+		return irq == -ENXIO ? 0 : irq;
+
+	ret = devm_request_irq(dev, irq, arm_cspmu_handle_irq,
+			       IRQF_NOBALANCING | IRQF_NO_THREAD, dev_name(dev),
+			       cspmu);
+	if (ret) {
+		dev_err(dev, "Could not request IRQ %d\n", irq);
+		return ret;
+	}
+
+	cspmu->irq = irq;
+
+	return 0;
+}
+
+#if defined(CONFIG_ACPI) && defined(CONFIG_ARM64)
+#include <acpi/processor.h>
+
+static inline int arm_cspmu_find_cpu_container(int cpu, u32 container_uid)
+{
+	u32 acpi_uid;
+	struct device *cpu_dev;
+	struct acpi_device *acpi_dev;
+
+	cpu_dev = get_cpu_device(cpu);
+	if (!cpu_dev)
+		return -ENODEV;
+
+	acpi_dev = ACPI_COMPANION(cpu_dev);
+	while (acpi_dev) {
+		if (!strcmp(acpi_device_hid(acpi_dev),
+			    ACPI_PROCESSOR_CONTAINER_HID) &&
+		    !kstrtouint(acpi_device_uid(acpi_dev), 0, &acpi_uid) &&
+		    acpi_uid == container_uid)
+			return 0;
+
+		acpi_dev = acpi_dev_parent(acpi_dev);
+	}
+
+	return -ENODEV;
+}
+
+static int arm_cspmu_acpi_get_cpus(struct arm_cspmu *cspmu)
+{
+	struct acpi_apmt_node *apmt_node;
+	int affinity_flag;
+	int cpu;
+
+	apmt_node = arm_cspmu_apmt_node(cspmu->dev);
+	affinity_flag = apmt_node->flags & ACPI_APMT_FLAGS_AFFINITY;
+
+	if (affinity_flag == ACPI_APMT_FLAGS_AFFINITY_PROC) {
+		for_each_possible_cpu(cpu) {
+			if (apmt_node->proc_affinity ==
+			    get_acpi_id_for_cpu(cpu)) {
+				cpumask_set_cpu(cpu, &cspmu->associated_cpus);
+				break;
+			}
+		}
+	} else {
+		for_each_possible_cpu(cpu) {
+			if (arm_cspmu_find_cpu_container(
+				    cpu, apmt_node->proc_affinity))
+				continue;
+
+			cpumask_set_cpu(cpu, &cspmu->associated_cpus);
+		}
+	}
+
+	if (cpumask_empty(&cspmu->associated_cpus)) {
+		dev_dbg(cspmu->dev, "No cpu associated with the PMU\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+#else
+static int arm_cspmu_acpi_get_cpus(struct arm_cspmu *cspmu)
+{
+	return -ENODEV;
+}
+#endif
+
+static int arm_cspmu_get_cpus(struct arm_cspmu *cspmu)
+{
+	return arm_cspmu_acpi_get_cpus(cspmu);
+}
+
+static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu)
+{
+	int ret, capabilities;
+	struct attribute_group **attr_groups;
+
+	attr_groups = arm_cspmu_alloc_attr_group(cspmu);
+	if (!attr_groups)
+		return -ENOMEM;
+
+	ret = cpuhp_state_add_instance(arm_cspmu_cpuhp_state,
+				       &cspmu->cpuhp_node);
+	if (ret)
+		return ret;
+
+	capabilities = PERF_PMU_CAP_NO_EXCLUDE;
+	if (cspmu->irq == 0)
+		capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
+	cspmu->pmu = (struct pmu){
+		.task_ctx_nr	= perf_invalid_context,
+		.module		= THIS_MODULE,
+		.pmu_enable	= arm_cspmu_enable,
+		.pmu_disable	= arm_cspmu_disable,
+		.event_init	= arm_cspmu_event_init,
+		.add		= arm_cspmu_add,
+		.del		= arm_cspmu_del,
+		.start		= arm_cspmu_start,
+		.stop		= arm_cspmu_stop,
+		.read		= arm_cspmu_read,
+		.attr_groups	= (const struct attribute_group **)attr_groups,
+		.capabilities	= capabilities,
+	};
+
+	/* Hardware counter init */
+	arm_cspmu_stop_counters(cspmu);
+	arm_cspmu_reset_counters(cspmu);
+
+	ret = perf_pmu_register(&cspmu->pmu, cspmu->name, -1);
+	if (ret) {
+		cpuhp_state_remove_instance(arm_cspmu_cpuhp_state,
+					    &cspmu->cpuhp_node);
+	}
+
+	return ret;
+}
+
+static int arm_cspmu_device_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct arm_cspmu *cspmu;
+
+	cspmu = arm_cspmu_alloc(pdev);
+	if (!cspmu)
+		return -ENOMEM;
+
+	ret = arm_cspmu_init_mmio(cspmu);
+	if (ret)
+		return ret;
+
+	ret = arm_cspmu_request_irq(cspmu);
+	if (ret)
+		return ret;
+
+	ret = arm_cspmu_get_cpus(cspmu);
+	if (ret)
+		return ret;
+
+	ret = arm_cspmu_register_pmu(cspmu);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int arm_cspmu_device_remove(struct platform_device *pdev)
+{
+	struct arm_cspmu *cspmu = platform_get_drvdata(pdev);
+
+	perf_pmu_unregister(&cspmu->pmu);
+	cpuhp_state_remove_instance(arm_cspmu_cpuhp_state, &cspmu->cpuhp_node);
+
+	return 0;
+}
+
+static const struct platform_device_id arm_cspmu_id[] = {
+	{DRVNAME, 0},
+	{ },
+};
+MODULE_DEVICE_TABLE(platform, arm_cspmu_id);
+
+static struct platform_driver arm_cspmu_driver = {
+	.driver = {
+			.name = DRVNAME,
+			.suppress_bind_attrs = true,
+		},
+	.probe = arm_cspmu_device_probe,
+	.remove = arm_cspmu_device_remove,
+	.id_table = arm_cspmu_id,
+};
+
+static void arm_cspmu_set_active_cpu(int cpu, struct arm_cspmu *cspmu)
+{
+	cpumask_set_cpu(cpu, &cspmu->active_cpu);
+	if (cspmu->irq)
+		WARN_ON(irq_set_affinity(cspmu->irq, &cspmu->active_cpu));
+}
+
+static int arm_cspmu_cpu_online(unsigned int cpu, struct hlist_node *node)
+{
+	struct arm_cspmu *cspmu =
+		hlist_entry_safe(node, struct arm_cspmu, cpuhp_node);
+
+	if (!cpumask_test_cpu(cpu, &cspmu->associated_cpus))
+		return 0;
+
+	/* If the PMU is already managed, there is nothing to do */
+	if (!cpumask_empty(&cspmu->active_cpu))
+		return 0;
+
+	/* Use this CPU for event counting */
+	arm_cspmu_set_active_cpu(cpu, cspmu);
+
+	return 0;
+}
+
+static int arm_cspmu_cpu_teardown(unsigned int cpu, struct hlist_node *node)
+{
+	int dst;
+	struct cpumask online_supported;
+
+	struct arm_cspmu *cspmu =
+		hlist_entry_safe(node, struct arm_cspmu, cpuhp_node);
+
+	/* Nothing to do if this CPU doesn't own the PMU */
+	if (!cpumask_test_and_clear_cpu(cpu, &cspmu->active_cpu))
+		return 0;
+
+	/* Choose a new CPU to migrate ownership of the PMU to */
+	cpumask_and(&online_supported, &cspmu->associated_cpus,
+		    cpu_online_mask);
+	dst = cpumask_any_but(&online_supported, cpu);
+	if (dst >= nr_cpu_ids)
+		return 0;
+
+	/* Use this CPU for event counting */
+	perf_pmu_migrate_context(&cspmu->pmu, cpu, dst);
+	arm_cspmu_set_active_cpu(dst, cspmu);
+
+	return 0;
+}
+
+static int __init arm_cspmu_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+					"perf/arm/cspmu:online",
+					arm_cspmu_cpu_online,
+					arm_cspmu_cpu_teardown);
+	if (ret < 0)
+		return ret;
+	arm_cspmu_cpuhp_state = ret;
+	return platform_driver_register(&arm_cspmu_driver);
+}
+
+static void __exit arm_cspmu_exit(void)
+{
+	platform_driver_unregister(&arm_cspmu_driver);
+	cpuhp_remove_multi_state(arm_cspmu_cpuhp_state);
+}
+
+module_init(arm_cspmu_init);
+module_exit(arm_cspmu_exit);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h
new file mode 100644
index 0000000000..83df53d1c1
--- /dev/null
+++ b/drivers/perf/arm_cspmu/arm_cspmu.h
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * ARM CoreSight Architecture PMU driver.
+ * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ */
+
+#ifndef __ARM_CSPMU_H__
+#define __ARM_CSPMU_H__
+
+#include <linux/bitfield.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+
+#define to_arm_cspmu(p) (container_of(p, struct arm_cspmu, pmu))
+
+#define ARM_CSPMU_EXT_ATTR(_name, _func, _config)			\
+	(&((struct dev_ext_attribute[]){				\
+		{							\
+			.attr = __ATTR(_name, 0444, _func, NULL),	\
+			.var = (void *)_config				\
+		}							\
+	})[0].attr.attr)
+
+#define ARM_CSPMU_FORMAT_ATTR(_name, _config)				\
+	ARM_CSPMU_EXT_ATTR(_name, arm_cspmu_sysfs_format_show, (char *)_config)
+
+#define ARM_CSPMU_EVENT_ATTR(_name, _config)				\
+	PMU_EVENT_ATTR_ID(_name, arm_cspmu_sysfs_event_show, _config)
+
+
+/* Default event id mask */
+#define ARM_CSPMU_EVENT_MASK	GENMASK_ULL(63, 0)
+
+/* Default filter value mask */
+#define ARM_CSPMU_FILTER_MASK	GENMASK_ULL(63, 0)
+
+/* Default event format */
+#define ARM_CSPMU_FORMAT_EVENT_ATTR	\
+	ARM_CSPMU_FORMAT_ATTR(event, "config:0-32")
+
+/* Default filter format */
+#define ARM_CSPMU_FORMAT_FILTER_ATTR	\
+	ARM_CSPMU_FORMAT_ATTR(filter, "config1:0-31")
+
+/*
+ * This is the default event number for cycle count, if supported, since the
+ * ARM Coresight PMU specification does not define a standard event code
+ * for cycle count.
+ */
+#define ARM_CSPMU_EVT_CYCLES_DEFAULT	(0x1ULL << 32)
+
+/*
+ * The ARM Coresight PMU supports up to 256 event counters.
+ * If the counters are larger-than 32-bits, then the PMU includes at
+ * most 128 counters.
+ */
+#define ARM_CSPMU_MAX_HW_CNTRS		256
+
+/* The cycle counter, if implemented, is located at counter[31]. */
+#define ARM_CSPMU_CYCLE_CNTR_IDX	31
+
+/* PMIIDR register field */
+#define ARM_CSPMU_PMIIDR_IMPLEMENTER	GENMASK(11, 0)
+#define ARM_CSPMU_PMIIDR_PRODUCTID	GENMASK(31, 20)
+
+struct arm_cspmu;
+
+/* This tracks the events assigned to each counter in the PMU. */
+struct arm_cspmu_hw_events {
+	/* The events that are active on the PMU for a given logical index. */
+	struct perf_event **events;
+
+	/*
+	 * Each bit indicates a logical counter is being used (or not) for an
+	 * event. If cycle counter is supported and there is a gap between
+	 * regular and cycle counter, the last logical counter is mapped to
+	 * cycle counter. Otherwise, logical and physical have 1-to-1 mapping.
+	 */
+	DECLARE_BITMAP(used_ctrs, ARM_CSPMU_MAX_HW_CNTRS);
+};
+
+/* Contains ops to query vendor/implementer specific attribute. */
+struct arm_cspmu_impl_ops {
+	/* Get event attributes */
+	struct attribute **(*get_event_attrs)(const struct arm_cspmu *cspmu);
+	/* Get format attributes */
+	struct attribute **(*get_format_attrs)(const struct arm_cspmu *cspmu);
+	/* Get string identifier */
+	const char *(*get_identifier)(const struct arm_cspmu *cspmu);
+	/* Get PMU name to register to core perf */
+	const char *(*get_name)(const struct arm_cspmu *cspmu);
+	/* Check if the event corresponds to cycle count event */
+	bool (*is_cycle_counter_event)(const struct perf_event *event);
+	/* Decode event type/id from configs */
+	u32 (*event_type)(const struct perf_event *event);
+	/* Decode filter value from configs */
+	u32 (*event_filter)(const struct perf_event *event);
+	/* Hide/show unsupported events */
+	umode_t (*event_attr_is_visible)(struct kobject *kobj,
+					 struct attribute *attr, int unused);
+};
+
+/* Vendor/implementer descriptor. */
+struct arm_cspmu_impl {
+	u32 pmiidr;
+	struct arm_cspmu_impl_ops ops;
+	void *ctx;
+};
+
+/* Coresight PMU descriptor. */
+struct arm_cspmu {
+	struct pmu pmu;
+	struct device *dev;
+	const char *name;
+	const char *identifier;
+	void __iomem *base0;
+	void __iomem *base1;
+	cpumask_t associated_cpus;
+	cpumask_t active_cpu;
+	struct hlist_node cpuhp_node;
+	int irq;
+
+	bool has_atomic_dword;
+	u32 pmcfgr;
+	u32 num_logical_ctrs;
+	u32 num_set_clr_reg;
+	int cycle_counter_logical_idx;
+
+	struct arm_cspmu_hw_events hw_events;
+
+	struct arm_cspmu_impl impl;
+};
+
+/* Default function to show event attribute in sysfs. */
+ssize_t arm_cspmu_sysfs_event_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf);
+
+/* Default function to show format attribute in sysfs. */
+ssize_t arm_cspmu_sysfs_format_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf);
+
+#endif /* __ARM_CSPMU_H__ */
diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c
new file mode 100644
index 0000000000..72ef80caa3
--- /dev/null
+++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c
@@ -0,0 +1,400 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ */
+
+/* Support for NVIDIA specific attributes. */
+
+#include <linux/topology.h>
+
+#include "nvidia_cspmu.h"
+
+#define NV_PCIE_PORT_COUNT           10ULL
+#define NV_PCIE_FILTER_ID_MASK       GENMASK_ULL(NV_PCIE_PORT_COUNT - 1, 0)
+
+#define NV_NVL_C2C_PORT_COUNT        2ULL
+#define NV_NVL_C2C_FILTER_ID_MASK    GENMASK_ULL(NV_NVL_C2C_PORT_COUNT - 1, 0)
+
+#define NV_CNVL_PORT_COUNT           4ULL
+#define NV_CNVL_FILTER_ID_MASK       GENMASK_ULL(NV_CNVL_PORT_COUNT - 1, 0)
+
+#define NV_GENERIC_FILTER_ID_MASK    GENMASK_ULL(31, 0)
+
+#define NV_PRODID_MASK               GENMASK(31, 0)
+
+#define NV_FORMAT_NAME_GENERIC	0
+
+#define to_nv_cspmu_ctx(cspmu)	((struct nv_cspmu_ctx *)(cspmu->impl.ctx))
+
+#define NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _num, _suff, _config)	\
+	ARM_CSPMU_EVENT_ATTR(_pref##_num##_suff, _config)
+
+#define NV_CSPMU_EVENT_ATTR_4(_pref, _suff, _config)			\
+	NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _0_, _suff, _config),	\
+	NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _1_, _suff, _config + 1),	\
+	NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _2_, _suff, _config + 2),	\
+	NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _3_, _suff, _config + 3)
+
+struct nv_cspmu_ctx {
+	const char *name;
+	u32 filter_mask;
+	u32 filter_default_val;
+	struct attribute **event_attr;
+	struct attribute **format_attr;
+};
+
+static struct attribute *scf_pmu_event_attrs[] = {
+	ARM_CSPMU_EVENT_ATTR(bus_cycles,			0x1d),
+
+	ARM_CSPMU_EVENT_ATTR(scf_cache_allocate,		0xF0),
+	ARM_CSPMU_EVENT_ATTR(scf_cache_refill,			0xF1),
+	ARM_CSPMU_EVENT_ATTR(scf_cache,				0xF2),
+	ARM_CSPMU_EVENT_ATTR(scf_cache_wb,			0xF3),
+
+	NV_CSPMU_EVENT_ATTR_4(socket, rd_data,			0x101),
+	NV_CSPMU_EVENT_ATTR_4(socket, dl_rsp,			0x105),
+	NV_CSPMU_EVENT_ATTR_4(socket, wb_data,			0x109),
+	NV_CSPMU_EVENT_ATTR_4(socket, ev_rsp,			0x10d),
+	NV_CSPMU_EVENT_ATTR_4(socket, prb_data,			0x111),
+
+	NV_CSPMU_EVENT_ATTR_4(socket, rd_outstanding,		0x115),
+	NV_CSPMU_EVENT_ATTR_4(socket, dl_outstanding,		0x119),
+	NV_CSPMU_EVENT_ATTR_4(socket, wb_outstanding,		0x11d),
+	NV_CSPMU_EVENT_ATTR_4(socket, wr_outstanding,		0x121),
+	NV_CSPMU_EVENT_ATTR_4(socket, ev_outstanding,		0x125),
+	NV_CSPMU_EVENT_ATTR_4(socket, prb_outstanding,		0x129),
+
+	NV_CSPMU_EVENT_ATTR_4(socket, rd_access,		0x12d),
+	NV_CSPMU_EVENT_ATTR_4(socket, dl_access,		0x131),
+	NV_CSPMU_EVENT_ATTR_4(socket, wb_access,		0x135),
+	NV_CSPMU_EVENT_ATTR_4(socket, wr_access,		0x139),
+	NV_CSPMU_EVENT_ATTR_4(socket, ev_access,		0x13d),
+	NV_CSPMU_EVENT_ATTR_4(socket, prb_access,		0x141),
+
+	NV_CSPMU_EVENT_ATTR_4(ocu, gmem_rd_data,		0x145),
+	NV_CSPMU_EVENT_ATTR_4(ocu, gmem_rd_access,		0x149),
+	NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wb_access,		0x14d),
+	NV_CSPMU_EVENT_ATTR_4(ocu, gmem_rd_outstanding,		0x151),
+	NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wr_outstanding,		0x155),
+
+	NV_CSPMU_EVENT_ATTR_4(ocu, rem_rd_data,			0x159),
+	NV_CSPMU_EVENT_ATTR_4(ocu, rem_rd_access,		0x15d),
+	NV_CSPMU_EVENT_ATTR_4(ocu, rem_wb_access,		0x161),
+	NV_CSPMU_EVENT_ATTR_4(ocu, rem_rd_outstanding,		0x165),
+	NV_CSPMU_EVENT_ATTR_4(ocu, rem_wr_outstanding,		0x169),
+
+	ARM_CSPMU_EVENT_ATTR(gmem_rd_data,			0x16d),
+	ARM_CSPMU_EVENT_ATTR(gmem_rd_access,			0x16e),
+	ARM_CSPMU_EVENT_ATTR(gmem_rd_outstanding,		0x16f),
+	ARM_CSPMU_EVENT_ATTR(gmem_dl_rsp,			0x170),
+	ARM_CSPMU_EVENT_ATTR(gmem_dl_access,			0x171),
+	ARM_CSPMU_EVENT_ATTR(gmem_dl_outstanding,		0x172),
+	ARM_CSPMU_EVENT_ATTR(gmem_wb_data,			0x173),
+	ARM_CSPMU_EVENT_ATTR(gmem_wb_access,			0x174),
+	ARM_CSPMU_EVENT_ATTR(gmem_wb_outstanding,		0x175),
+	ARM_CSPMU_EVENT_ATTR(gmem_ev_rsp,			0x176),
+	ARM_CSPMU_EVENT_ATTR(gmem_ev_access,			0x177),
+	ARM_CSPMU_EVENT_ATTR(gmem_ev_outstanding,		0x178),
+	ARM_CSPMU_EVENT_ATTR(gmem_wr_data,			0x179),
+	ARM_CSPMU_EVENT_ATTR(gmem_wr_outstanding,		0x17a),
+	ARM_CSPMU_EVENT_ATTR(gmem_wr_access,			0x17b),
+
+	NV_CSPMU_EVENT_ATTR_4(socket, wr_data,			0x17c),
+
+	NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wr_data,		0x180),
+	NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wb_data,		0x184),
+	NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wr_access,		0x188),
+	NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wb_outstanding,		0x18c),
+
+	NV_CSPMU_EVENT_ATTR_4(ocu, rem_wr_data,			0x190),
+	NV_CSPMU_EVENT_ATTR_4(ocu, rem_wb_data,			0x194),
+	NV_CSPMU_EVENT_ATTR_4(ocu, rem_wr_access,		0x198),
+	NV_CSPMU_EVENT_ATTR_4(ocu, rem_wb_outstanding,		0x19c),
+
+	ARM_CSPMU_EVENT_ATTR(gmem_wr_total_bytes,		0x1a0),
+	ARM_CSPMU_EVENT_ATTR(remote_socket_wr_total_bytes,	0x1a1),
+	ARM_CSPMU_EVENT_ATTR(remote_socket_rd_data,		0x1a2),
+	ARM_CSPMU_EVENT_ATTR(remote_socket_rd_outstanding,	0x1a3),
+	ARM_CSPMU_EVENT_ATTR(remote_socket_rd_access,		0x1a4),
+
+	ARM_CSPMU_EVENT_ATTR(cmem_rd_data,			0x1a5),
+	ARM_CSPMU_EVENT_ATTR(cmem_rd_access,			0x1a6),
+	ARM_CSPMU_EVENT_ATTR(cmem_rd_outstanding,		0x1a7),
+	ARM_CSPMU_EVENT_ATTR(cmem_dl_rsp,			0x1a8),
+	ARM_CSPMU_EVENT_ATTR(cmem_dl_access,			0x1a9),
+	ARM_CSPMU_EVENT_ATTR(cmem_dl_outstanding,		0x1aa),
+	ARM_CSPMU_EVENT_ATTR(cmem_wb_data,			0x1ab),
+	ARM_CSPMU_EVENT_ATTR(cmem_wb_access,			0x1ac),
+	ARM_CSPMU_EVENT_ATTR(cmem_wb_outstanding,		0x1ad),
+	ARM_CSPMU_EVENT_ATTR(cmem_ev_rsp,			0x1ae),
+	ARM_CSPMU_EVENT_ATTR(cmem_ev_access,			0x1af),
+	ARM_CSPMU_EVENT_ATTR(cmem_ev_outstanding,		0x1b0),
+	ARM_CSPMU_EVENT_ATTR(cmem_wr_data,			0x1b1),
+	ARM_CSPMU_EVENT_ATTR(cmem_wr_outstanding,		0x1b2),
+
+	NV_CSPMU_EVENT_ATTR_4(ocu, cmem_rd_data,		0x1b3),
+	NV_CSPMU_EVENT_ATTR_4(ocu, cmem_rd_access,		0x1b7),
+	NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wb_access,		0x1bb),
+	NV_CSPMU_EVENT_ATTR_4(ocu, cmem_rd_outstanding,		0x1bf),
+	NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wr_outstanding,		0x1c3),
+
+	ARM_CSPMU_EVENT_ATTR(ocu_prb_access,			0x1c7),
+	ARM_CSPMU_EVENT_ATTR(ocu_prb_data,			0x1c8),
+	ARM_CSPMU_EVENT_ATTR(ocu_prb_outstanding,		0x1c9),
+
+	ARM_CSPMU_EVENT_ATTR(cmem_wr_access,			0x1ca),
+
+	NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wr_access,		0x1cb),
+	NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wb_data,		0x1cf),
+	NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wr_data,		0x1d3),
+	NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wb_outstanding,		0x1d7),
+
+	ARM_CSPMU_EVENT_ATTR(cmem_wr_total_bytes,		0x1db),
+
+	ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
+	NULL,
+};
+
+static struct attribute *mcf_pmu_event_attrs[] = {
+	ARM_CSPMU_EVENT_ATTR(rd_bytes_loc,			0x0),
+	ARM_CSPMU_EVENT_ATTR(rd_bytes_rem,			0x1),
+	ARM_CSPMU_EVENT_ATTR(wr_bytes_loc,			0x2),
+	ARM_CSPMU_EVENT_ATTR(wr_bytes_rem,			0x3),
+	ARM_CSPMU_EVENT_ATTR(total_bytes_loc,			0x4),
+	ARM_CSPMU_EVENT_ATTR(total_bytes_rem,			0x5),
+	ARM_CSPMU_EVENT_ATTR(rd_req_loc,			0x6),
+	ARM_CSPMU_EVENT_ATTR(rd_req_rem,			0x7),
+	ARM_CSPMU_EVENT_ATTR(wr_req_loc,			0x8),
+	ARM_CSPMU_EVENT_ATTR(wr_req_rem,			0x9),
+	ARM_CSPMU_EVENT_ATTR(total_req_loc,			0xa),
+	ARM_CSPMU_EVENT_ATTR(total_req_rem,			0xb),
+	ARM_CSPMU_EVENT_ATTR(rd_cum_outs_loc,			0xc),
+	ARM_CSPMU_EVENT_ATTR(rd_cum_outs_rem,			0xd),
+	ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
+	NULL,
+};
+
+static struct attribute *generic_pmu_event_attrs[] = {
+	ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
+	NULL,
+};
+
+static struct attribute *scf_pmu_format_attrs[] = {
+	ARM_CSPMU_FORMAT_EVENT_ATTR,
+	NULL,
+};
+
+static struct attribute *pcie_pmu_format_attrs[] = {
+	ARM_CSPMU_FORMAT_EVENT_ATTR,
+	ARM_CSPMU_FORMAT_ATTR(root_port, "config1:0-9"),
+	NULL,
+};
+
+static struct attribute *nvlink_c2c_pmu_format_attrs[] = {
+	ARM_CSPMU_FORMAT_EVENT_ATTR,
+	NULL,
+};
+
+static struct attribute *cnvlink_pmu_format_attrs[] = {
+	ARM_CSPMU_FORMAT_EVENT_ATTR,
+	ARM_CSPMU_FORMAT_ATTR(rem_socket, "config1:0-3"),
+	NULL,
+};
+
+static struct attribute *generic_pmu_format_attrs[] = {
+	ARM_CSPMU_FORMAT_EVENT_ATTR,
+	ARM_CSPMU_FORMAT_FILTER_ATTR,
+	NULL,
+};
+
+static struct attribute **
+nv_cspmu_get_event_attrs(const struct arm_cspmu *cspmu)
+{
+	const struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
+
+	return ctx->event_attr;
+}
+
+static struct attribute **
+nv_cspmu_get_format_attrs(const struct arm_cspmu *cspmu)
+{
+	const struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
+
+	return ctx->format_attr;
+}
+
+static const char *
+nv_cspmu_get_name(const struct arm_cspmu *cspmu)
+{
+	const struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
+
+	return ctx->name;
+}
+
+static u32 nv_cspmu_event_filter(const struct perf_event *event)
+{
+	const struct nv_cspmu_ctx *ctx =
+		to_nv_cspmu_ctx(to_arm_cspmu(event->pmu));
+
+	if (ctx->filter_mask == 0)
+		return ctx->filter_default_val;
+
+	return event->attr.config1 & ctx->filter_mask;
+}
+
+enum nv_cspmu_name_fmt {
+	NAME_FMT_GENERIC,
+	NAME_FMT_SOCKET
+};
+
+struct nv_cspmu_match {
+	u32 prodid;
+	u32 prodid_mask;
+	u64 filter_mask;
+	u32 filter_default_val;
+	const char *name_pattern;
+	enum nv_cspmu_name_fmt name_fmt;
+	struct attribute **event_attr;
+	struct attribute **format_attr;
+};
+
+static const struct nv_cspmu_match nv_cspmu_match[] = {
+	{
+	  .prodid = 0x103,
+	  .prodid_mask = NV_PRODID_MASK,
+	  .filter_mask = NV_PCIE_FILTER_ID_MASK,
+	  .filter_default_val = NV_PCIE_FILTER_ID_MASK,
+	  .name_pattern = "nvidia_pcie_pmu_%u",
+	  .name_fmt = NAME_FMT_SOCKET,
+	  .event_attr = mcf_pmu_event_attrs,
+	  .format_attr = pcie_pmu_format_attrs
+	},
+	{
+	  .prodid = 0x104,
+	  .prodid_mask = NV_PRODID_MASK,
+	  .filter_mask = 0x0,
+	  .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK,
+	  .name_pattern = "nvidia_nvlink_c2c1_pmu_%u",
+	  .name_fmt = NAME_FMT_SOCKET,
+	  .event_attr = mcf_pmu_event_attrs,
+	  .format_attr = nvlink_c2c_pmu_format_attrs
+	},
+	{
+	  .prodid = 0x105,
+	  .prodid_mask = NV_PRODID_MASK,
+	  .filter_mask = 0x0,
+	  .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK,
+	  .name_pattern = "nvidia_nvlink_c2c0_pmu_%u",
+	  .name_fmt = NAME_FMT_SOCKET,
+	  .event_attr = mcf_pmu_event_attrs,
+	  .format_attr = nvlink_c2c_pmu_format_attrs
+	},
+	{
+	  .prodid = 0x106,
+	  .prodid_mask = NV_PRODID_MASK,
+	  .filter_mask = NV_CNVL_FILTER_ID_MASK,
+	  .filter_default_val = NV_CNVL_FILTER_ID_MASK,
+	  .name_pattern = "nvidia_cnvlink_pmu_%u",
+	  .name_fmt = NAME_FMT_SOCKET,
+	  .event_attr = mcf_pmu_event_attrs,
+	  .format_attr = cnvlink_pmu_format_attrs
+	},
+	{
+	  .prodid = 0x2CF,
+	  .prodid_mask = NV_PRODID_MASK,
+	  .filter_mask = 0x0,
+	  .filter_default_val = 0x0,
+	  .name_pattern = "nvidia_scf_pmu_%u",
+	  .name_fmt = NAME_FMT_SOCKET,
+	  .event_attr = scf_pmu_event_attrs,
+	  .format_attr = scf_pmu_format_attrs
+	},
+	{
+	  .prodid = 0,
+	  .prodid_mask = 0,
+	  .filter_mask = NV_GENERIC_FILTER_ID_MASK,
+	  .filter_default_val = NV_GENERIC_FILTER_ID_MASK,
+	  .name_pattern = "nvidia_uncore_pmu_%u",
+	  .name_fmt = NAME_FMT_GENERIC,
+	  .event_attr = generic_pmu_event_attrs,
+	  .format_attr = generic_pmu_format_attrs
+	},
+};
+
+static char *nv_cspmu_format_name(const struct arm_cspmu *cspmu,
+				  const struct nv_cspmu_match *match)
+{
+	char *name;
+	struct device *dev = cspmu->dev;
+
+	static atomic_t pmu_generic_idx = {0};
+
+	switch (match->name_fmt) {
+	case NAME_FMT_SOCKET: {
+		const int cpu = cpumask_first(&cspmu->associated_cpus);
+		const int socket = cpu_to_node(cpu);
+
+		name = devm_kasprintf(dev, GFP_KERNEL, match->name_pattern,
+				       socket);
+		break;
+	}
+	case NAME_FMT_GENERIC:
+		name = devm_kasprintf(dev, GFP_KERNEL, match->name_pattern,
+				       atomic_fetch_inc(&pmu_generic_idx));
+		break;
+	default:
+		name = NULL;
+		break;
+	}
+
+	return name;
+}
+
+int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
+{
+	u32 prodid;
+	struct nv_cspmu_ctx *ctx;
+	struct device *dev = cspmu->dev;
+	struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
+	const struct nv_cspmu_match *match = nv_cspmu_match;
+
+	ctx = devm_kzalloc(dev, sizeof(struct nv_cspmu_ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	prodid = FIELD_GET(ARM_CSPMU_PMIIDR_PRODUCTID, cspmu->impl.pmiidr);
+
+	/* Find matching PMU. */
+	for (; match->prodid; match++) {
+		const u32 prodid_mask = match->prodid_mask;
+
+		if ((match->prodid & prodid_mask) == (prodid & prodid_mask))
+			break;
+	}
+
+	ctx->name		= nv_cspmu_format_name(cspmu, match);
+	ctx->filter_mask	= match->filter_mask;
+	ctx->filter_default_val = match->filter_default_val;
+	ctx->event_attr		= match->event_attr;
+	ctx->format_attr	= match->format_attr;
+
+	cspmu->impl.ctx = ctx;
+
+	/* NVIDIA specific callbacks. */
+	impl_ops->event_filter			= nv_cspmu_event_filter;
+	impl_ops->get_event_attrs		= nv_cspmu_get_event_attrs;
+	impl_ops->get_format_attrs		= nv_cspmu_get_format_attrs;
+	impl_ops->get_name			= nv_cspmu_get_name;
+
+	/* Set others to NULL to use default callback. */
+	impl_ops->event_type			= NULL;
+	impl_ops->event_attr_is_visible		= NULL;
+	impl_ops->get_identifier		= NULL;
+	impl_ops->is_cycle_counter_event	= NULL;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nv_cspmu_init_ops);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.h b/drivers/perf/arm_cspmu/nvidia_cspmu.h
new file mode 100644
index 0000000000..71e18f0dc5
--- /dev/null
+++ b/drivers/perf/arm_cspmu/nvidia_cspmu.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ */
+
+/* Support for NVIDIA specific attributes. */
+
+#ifndef __NVIDIA_CSPMU_H__
+#define __NVIDIA_CSPMU_H__
+
+#include "arm_cspmu.h"
+
+/* Allocate NVIDIA descriptor. */
+int nv_cspmu_init_ops(struct arm_cspmu *cspmu);
+
+#endif /* __NVIDIA_CSPMU_H__ */
diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
new file mode 100644
index 0000000000..30cea68595
--- /dev/null
+++ b/drivers/perf/arm_dmc620_pmu.c
@@ -0,0 +1,783 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ARM DMC-620 memory controller PMU driver
+ *
+ * Copyright (C) 2020 Ampere Computing LLC.
+ */
+
+#define DMC620_PMUNAME		"arm_dmc620"
+#define DMC620_DRVNAME		DMC620_PMUNAME "_pmu"
+#define pr_fmt(fmt)		DMC620_DRVNAME ": " fmt
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/cpuhotplug.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+#include <linux/rculist.h>
+#include <linux/refcount.h>
+
+#define DMC620_PA_SHIFT					12
+#define DMC620_CNT_INIT					0x80000000
+#define DMC620_CNT_MAX_PERIOD				0xffffffff
+#define DMC620_PMU_CLKDIV2_MAX_COUNTERS			8
+#define DMC620_PMU_CLK_MAX_COUNTERS			2
+#define DMC620_PMU_MAX_COUNTERS				\
+	(DMC620_PMU_CLKDIV2_MAX_COUNTERS + DMC620_PMU_CLK_MAX_COUNTERS)
+
+/*
+ * The PMU registers start at 0xA00 in the DMC-620 memory map, and these
+ * offsets are relative to that base.
+ *
+ * Each counter has a group of control/value registers, and the
+ * DMC620_PMU_COUNTERn offsets are within a counter group.
+ *
+ * The counter registers groups start at 0xA10.
+ */
+#define DMC620_PMU_OVERFLOW_STATUS_CLKDIV2		0x8
+#define  DMC620_PMU_OVERFLOW_STATUS_CLKDIV2_MASK	\
+		(DMC620_PMU_CLKDIV2_MAX_COUNTERS - 1)
+#define DMC620_PMU_OVERFLOW_STATUS_CLK			0xC
+#define  DMC620_PMU_OVERFLOW_STATUS_CLK_MASK		\
+		(DMC620_PMU_CLK_MAX_COUNTERS - 1)
+#define DMC620_PMU_COUNTERS_BASE			0x10
+#define DMC620_PMU_COUNTERn_MASK_31_00			0x0
+#define DMC620_PMU_COUNTERn_MASK_63_32			0x4
+#define DMC620_PMU_COUNTERn_MATCH_31_00			0x8
+#define DMC620_PMU_COUNTERn_MATCH_63_32			0xC
+#define DMC620_PMU_COUNTERn_CONTROL			0x10
+#define  DMC620_PMU_COUNTERn_CONTROL_ENABLE		BIT(0)
+#define  DMC620_PMU_COUNTERn_CONTROL_INVERT		BIT(1)
+#define  DMC620_PMU_COUNTERn_CONTROL_EVENT_MUX		GENMASK(6, 2)
+#define  DMC620_PMU_COUNTERn_CONTROL_INCR_MUX		GENMASK(8, 7)
+#define DMC620_PMU_COUNTERn_VALUE			0x20
+/* Offset of the registers for a given counter, relative to 0xA00 */
+#define DMC620_PMU_COUNTERn_OFFSET(n) \
+	(DMC620_PMU_COUNTERS_BASE + 0x28 * (n))
+
+/*
+ * dmc620_pmu_irqs_lock: protects dmc620_pmu_irqs list
+ * dmc620_pmu_node_lock: protects pmus_node lists in all dmc620_pmu instances
+ */
+static DEFINE_MUTEX(dmc620_pmu_irqs_lock);
+static DEFINE_MUTEX(dmc620_pmu_node_lock);
+static LIST_HEAD(dmc620_pmu_irqs);
+
+struct dmc620_pmu_irq {
+	struct hlist_node node;
+	struct list_head pmus_node;
+	struct list_head irqs_node;
+	refcount_t refcount;
+	unsigned int irq_num;
+	unsigned int cpu;
+};
+
+struct dmc620_pmu {
+	struct pmu pmu;
+
+	void __iomem *base;
+	struct dmc620_pmu_irq *irq;
+	struct list_head pmus_node;
+
+	/*
+	 * We put all clkdiv2 and clk counters to a same array.
+	 * The first DMC620_PMU_CLKDIV2_MAX_COUNTERS bits belong to
+	 * clkdiv2 counters, the last DMC620_PMU_CLK_MAX_COUNTERS
+	 * belong to clk counters.
+	 */
+	DECLARE_BITMAP(used_mask, DMC620_PMU_MAX_COUNTERS);
+	struct perf_event *events[DMC620_PMU_MAX_COUNTERS];
+};
+
+#define to_dmc620_pmu(p) (container_of(p, struct dmc620_pmu, pmu))
+
+static int cpuhp_state_num;
+
+struct dmc620_pmu_event_attr {
+	struct device_attribute attr;
+	u8 clkdiv2;
+	u8 eventid;
+};
+
+static ssize_t
+dmc620_pmu_event_show(struct device *dev,
+			   struct device_attribute *attr, char *page)
+{
+	struct dmc620_pmu_event_attr *eattr;
+
+	eattr = container_of(attr, typeof(*eattr), attr);
+
+	return sysfs_emit(page, "event=0x%x,clkdiv2=0x%x\n", eattr->eventid, eattr->clkdiv2);
+}
+
+#define DMC620_PMU_EVENT_ATTR(_name, _eventid, _clkdiv2)		\
+	(&((struct dmc620_pmu_event_attr[]) {{				\
+		.attr = __ATTR(_name, 0444, dmc620_pmu_event_show, NULL),	\
+		.clkdiv2 = _clkdiv2,						\
+		.eventid = _eventid,					\
+	}})[0].attr.attr)
+
+static struct attribute *dmc620_pmu_events_attrs[] = {
+	/* clkdiv2 events list */
+	DMC620_PMU_EVENT_ATTR(clkdiv2_cycle_count, 0x0, 1),
+	DMC620_PMU_EVENT_ATTR(clkdiv2_allocate, 0x1, 1),
+	DMC620_PMU_EVENT_ATTR(clkdiv2_queue_depth, 0x2, 1),
+	DMC620_PMU_EVENT_ATTR(clkdiv2_waiting_for_wr_data, 0x3, 1),
+	DMC620_PMU_EVENT_ATTR(clkdiv2_read_backlog, 0x4, 1),
+	DMC620_PMU_EVENT_ATTR(clkdiv2_waiting_for_mi, 0x5, 1),
+	DMC620_PMU_EVENT_ATTR(clkdiv2_hazard_resolution, 0x6, 1),
+	DMC620_PMU_EVENT_ATTR(clkdiv2_enqueue, 0x7, 1),
+	DMC620_PMU_EVENT_ATTR(clkdiv2_arbitrate, 0x8, 1),
+	DMC620_PMU_EVENT_ATTR(clkdiv2_lrank_turnaround_activate, 0x9, 1),
+	DMC620_PMU_EVENT_ATTR(clkdiv2_prank_turnaround_activate, 0xa, 1),
+	DMC620_PMU_EVENT_ATTR(clkdiv2_read_depth, 0xb, 1),
+	DMC620_PMU_EVENT_ATTR(clkdiv2_write_depth, 0xc, 1),
+	DMC620_PMU_EVENT_ATTR(clkdiv2_highigh_qos_depth, 0xd, 1),
+	DMC620_PMU_EVENT_ATTR(clkdiv2_high_qos_depth, 0xe, 1),
+	DMC620_PMU_EVENT_ATTR(clkdiv2_medium_qos_depth, 0xf, 1),
+	DMC620_PMU_EVENT_ATTR(clkdiv2_low_qos_depth, 0x10, 1),
+	DMC620_PMU_EVENT_ATTR(clkdiv2_activate, 0x11, 1),
+	DMC620_PMU_EVENT_ATTR(clkdiv2_rdwr, 0x12, 1),
+	DMC620_PMU_EVENT_ATTR(clkdiv2_refresh, 0x13, 1),
+	DMC620_PMU_EVENT_ATTR(clkdiv2_training_request, 0x14, 1),
+	DMC620_PMU_EVENT_ATTR(clkdiv2_t_mac_tracker, 0x15, 1),
+	DMC620_PMU_EVENT_ATTR(clkdiv2_bk_fsm_tracker, 0x16, 1),
+	DMC620_PMU_EVENT_ATTR(clkdiv2_bk_open_tracker, 0x17, 1),
+	DMC620_PMU_EVENT_ATTR(clkdiv2_ranks_in_pwr_down, 0x18, 1),
+	DMC620_PMU_EVENT_ATTR(clkdiv2_ranks_in_sref, 0x19, 1),
+
+	/* clk events list */
+	DMC620_PMU_EVENT_ATTR(clk_cycle_count, 0x0, 0),
+	DMC620_PMU_EVENT_ATTR(clk_request, 0x1, 0),
+	DMC620_PMU_EVENT_ATTR(clk_upload_stall, 0x2, 0),
+	NULL,
+};
+
+static const struct attribute_group dmc620_pmu_events_attr_group = {
+	.name = "events",
+	.attrs = dmc620_pmu_events_attrs,
+};
+
+/* User ABI */
+#define ATTR_CFG_FLD_mask_CFG		config
+#define ATTR_CFG_FLD_mask_LO		0
+#define ATTR_CFG_FLD_mask_HI		44
+#define ATTR_CFG_FLD_match_CFG		config1
+#define ATTR_CFG_FLD_match_LO		0
+#define ATTR_CFG_FLD_match_HI		44
+#define ATTR_CFG_FLD_invert_CFG		config2
+#define ATTR_CFG_FLD_invert_LO		0
+#define ATTR_CFG_FLD_invert_HI		0
+#define ATTR_CFG_FLD_incr_CFG		config2
+#define ATTR_CFG_FLD_incr_LO		1
+#define ATTR_CFG_FLD_incr_HI		2
+#define ATTR_CFG_FLD_event_CFG		config2
+#define ATTR_CFG_FLD_event_LO		3
+#define ATTR_CFG_FLD_event_HI		8
+#define ATTR_CFG_FLD_clkdiv2_CFG	config2
+#define ATTR_CFG_FLD_clkdiv2_LO		9
+#define ATTR_CFG_FLD_clkdiv2_HI		9
+
+#define __GEN_PMU_FORMAT_ATTR(cfg, lo, hi)			\
+	(lo) == (hi) ? #cfg ":" #lo "\n" : #cfg ":" #lo "-" #hi
+
+#define _GEN_PMU_FORMAT_ATTR(cfg, lo, hi)			\
+	__GEN_PMU_FORMAT_ATTR(cfg, lo, hi)
+
+#define GEN_PMU_FORMAT_ATTR(name)				\
+	PMU_FORMAT_ATTR(name,					\
+	_GEN_PMU_FORMAT_ATTR(ATTR_CFG_FLD_##name##_CFG,		\
+			     ATTR_CFG_FLD_##name##_LO,		\
+			     ATTR_CFG_FLD_##name##_HI))
+
+#define _ATTR_CFG_GET_FLD(attr, cfg, lo, hi)			\
+	((((attr)->cfg) >> lo) & GENMASK_ULL(hi - lo, 0))
+
+#define ATTR_CFG_GET_FLD(attr, name)				\
+	_ATTR_CFG_GET_FLD(attr,					\
+			  ATTR_CFG_FLD_##name##_CFG,		\
+			  ATTR_CFG_FLD_##name##_LO,		\
+			  ATTR_CFG_FLD_##name##_HI)
+
+GEN_PMU_FORMAT_ATTR(mask);
+GEN_PMU_FORMAT_ATTR(match);
+GEN_PMU_FORMAT_ATTR(invert);
+GEN_PMU_FORMAT_ATTR(incr);
+GEN_PMU_FORMAT_ATTR(event);
+GEN_PMU_FORMAT_ATTR(clkdiv2);
+
+static struct attribute *dmc620_pmu_formats_attrs[] = {
+	&format_attr_mask.attr,
+	&format_attr_match.attr,
+	&format_attr_invert.attr,
+	&format_attr_incr.attr,
+	&format_attr_event.attr,
+	&format_attr_clkdiv2.attr,
+	NULL,
+};
+
+static const struct attribute_group dmc620_pmu_format_attr_group = {
+	.name	= "format",
+	.attrs	= dmc620_pmu_formats_attrs,
+};
+
+static ssize_t dmc620_pmu_cpumask_show(struct device *dev,
+				       struct device_attribute *attr, char *buf)
+{
+	struct dmc620_pmu *dmc620_pmu = to_dmc620_pmu(dev_get_drvdata(dev));
+
+	return cpumap_print_to_pagebuf(true, buf,
+				       cpumask_of(dmc620_pmu->irq->cpu));
+}
+
+static struct device_attribute dmc620_pmu_cpumask_attr =
+	__ATTR(cpumask, 0444, dmc620_pmu_cpumask_show, NULL);
+
+static struct attribute *dmc620_pmu_cpumask_attrs[] = {
+	&dmc620_pmu_cpumask_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group dmc620_pmu_cpumask_attr_group = {
+	.attrs = dmc620_pmu_cpumask_attrs,
+};
+
+static const struct attribute_group *dmc620_pmu_attr_groups[] = {
+	&dmc620_pmu_events_attr_group,
+	&dmc620_pmu_format_attr_group,
+	&dmc620_pmu_cpumask_attr_group,
+	NULL,
+};
+
+static inline
+u32 dmc620_pmu_creg_read(struct dmc620_pmu *dmc620_pmu,
+			unsigned int idx, unsigned int reg)
+{
+	return readl(dmc620_pmu->base + DMC620_PMU_COUNTERn_OFFSET(idx) + reg);
+}
+
+static inline
+void dmc620_pmu_creg_write(struct dmc620_pmu *dmc620_pmu,
+			unsigned int idx, unsigned int reg, u32 val)
+{
+	writel(val, dmc620_pmu->base + DMC620_PMU_COUNTERn_OFFSET(idx) + reg);
+}
+
+static
+unsigned int dmc620_event_to_counter_control(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	unsigned int reg = 0;
+
+	reg |= FIELD_PREP(DMC620_PMU_COUNTERn_CONTROL_INVERT,
+			ATTR_CFG_GET_FLD(attr, invert));
+	reg |= FIELD_PREP(DMC620_PMU_COUNTERn_CONTROL_EVENT_MUX,
+			ATTR_CFG_GET_FLD(attr, event));
+	reg |= FIELD_PREP(DMC620_PMU_COUNTERn_CONTROL_INCR_MUX,
+			ATTR_CFG_GET_FLD(attr, incr));
+
+	return reg;
+}
+
+static int dmc620_get_event_idx(struct perf_event *event)
+{
+	struct dmc620_pmu *dmc620_pmu = to_dmc620_pmu(event->pmu);
+	int idx, start_idx, end_idx;
+
+	if (ATTR_CFG_GET_FLD(&event->attr, clkdiv2)) {
+		start_idx = 0;
+		end_idx = DMC620_PMU_CLKDIV2_MAX_COUNTERS;
+	} else {
+		start_idx = DMC620_PMU_CLKDIV2_MAX_COUNTERS;
+		end_idx = DMC620_PMU_MAX_COUNTERS;
+	}
+
+	for (idx = start_idx; idx < end_idx; ++idx) {
+		if (!test_and_set_bit(idx, dmc620_pmu->used_mask))
+			return idx;
+	}
+
+	/* The counters are all in use. */
+	return -EAGAIN;
+}
+
+static inline
+u64 dmc620_pmu_read_counter(struct perf_event *event)
+{
+	struct dmc620_pmu *dmc620_pmu = to_dmc620_pmu(event->pmu);
+
+	return dmc620_pmu_creg_read(dmc620_pmu,
+				    event->hw.idx, DMC620_PMU_COUNTERn_VALUE);
+}
+
+static void dmc620_pmu_event_update(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 delta, prev_count, new_count;
+
+	do {
+		/* We may also be called from the irq handler */
+		prev_count = local64_read(&hwc->prev_count);
+		new_count = dmc620_pmu_read_counter(event);
+	} while (local64_cmpxchg(&hwc->prev_count,
+			prev_count, new_count) != prev_count);
+	delta = (new_count - prev_count) & DMC620_CNT_MAX_PERIOD;
+	local64_add(delta, &event->count);
+}
+
+static void dmc620_pmu_event_set_period(struct perf_event *event)
+{
+	struct dmc620_pmu *dmc620_pmu = to_dmc620_pmu(event->pmu);
+
+	local64_set(&event->hw.prev_count, DMC620_CNT_INIT);
+	dmc620_pmu_creg_write(dmc620_pmu,
+			      event->hw.idx, DMC620_PMU_COUNTERn_VALUE, DMC620_CNT_INIT);
+}
+
+static void dmc620_pmu_enable_counter(struct perf_event *event)
+{
+	struct dmc620_pmu *dmc620_pmu = to_dmc620_pmu(event->pmu);
+	u32 reg;
+
+	reg = dmc620_event_to_counter_control(event) | DMC620_PMU_COUNTERn_CONTROL_ENABLE;
+	dmc620_pmu_creg_write(dmc620_pmu,
+			      event->hw.idx, DMC620_PMU_COUNTERn_CONTROL, reg);
+}
+
+static void dmc620_pmu_disable_counter(struct perf_event *event)
+{
+	struct dmc620_pmu *dmc620_pmu = to_dmc620_pmu(event->pmu);
+
+	dmc620_pmu_creg_write(dmc620_pmu,
+			      event->hw.idx, DMC620_PMU_COUNTERn_CONTROL, 0);
+}
+
+static irqreturn_t dmc620_pmu_handle_irq(int irq_num, void *data)
+{
+	struct dmc620_pmu_irq *irq = data;
+	struct dmc620_pmu *dmc620_pmu;
+	irqreturn_t ret = IRQ_NONE;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(dmc620_pmu, &irq->pmus_node, pmus_node) {
+		unsigned long status;
+		struct perf_event *event;
+		unsigned int idx;
+
+		/*
+		 * HW doesn't provide a control to atomically disable all counters.
+		 * To prevent race condition (overflow happens while clearing status register),
+		 * disable all events before continuing
+		 */
+		for (idx = 0; idx < DMC620_PMU_MAX_COUNTERS; idx++) {
+			event = dmc620_pmu->events[idx];
+			if (!event)
+				continue;
+			dmc620_pmu_disable_counter(event);
+		}
+
+		status = readl(dmc620_pmu->base + DMC620_PMU_OVERFLOW_STATUS_CLKDIV2);
+		status |= (readl(dmc620_pmu->base + DMC620_PMU_OVERFLOW_STATUS_CLK) <<
+				DMC620_PMU_CLKDIV2_MAX_COUNTERS);
+		if (status) {
+			for_each_set_bit(idx, &status,
+					DMC620_PMU_MAX_COUNTERS) {
+				event = dmc620_pmu->events[idx];
+				if (WARN_ON_ONCE(!event))
+					continue;
+				dmc620_pmu_event_update(event);
+				dmc620_pmu_event_set_period(event);
+			}
+
+			if (status & DMC620_PMU_OVERFLOW_STATUS_CLKDIV2_MASK)
+				writel(0, dmc620_pmu->base + DMC620_PMU_OVERFLOW_STATUS_CLKDIV2);
+
+			if ((status >> DMC620_PMU_CLKDIV2_MAX_COUNTERS) &
+				DMC620_PMU_OVERFLOW_STATUS_CLK_MASK)
+				writel(0, dmc620_pmu->base + DMC620_PMU_OVERFLOW_STATUS_CLK);
+		}
+
+		for (idx = 0; idx < DMC620_PMU_MAX_COUNTERS; idx++) {
+			event = dmc620_pmu->events[idx];
+			if (!event)
+				continue;
+			if (!(event->hw.state & PERF_HES_STOPPED))
+				dmc620_pmu_enable_counter(event);
+		}
+
+		ret = IRQ_HANDLED;
+	}
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static struct dmc620_pmu_irq *__dmc620_pmu_get_irq(int irq_num)
+{
+	struct dmc620_pmu_irq *irq;
+	int ret;
+
+	list_for_each_entry(irq, &dmc620_pmu_irqs, irqs_node)
+		if (irq->irq_num == irq_num && refcount_inc_not_zero(&irq->refcount))
+			return irq;
+
+	irq = kzalloc(sizeof(*irq), GFP_KERNEL);
+	if (!irq)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&irq->pmus_node);
+
+	/* Pick one CPU to be the preferred one to use */
+	irq->cpu = raw_smp_processor_id();
+	refcount_set(&irq->refcount, 1);
+
+	ret = request_irq(irq_num, dmc620_pmu_handle_irq,
+			  IRQF_NOBALANCING | IRQF_NO_THREAD,
+			  "dmc620-pmu", irq);
+	if (ret)
+		goto out_free_aff;
+
+	ret = irq_set_affinity(irq_num, cpumask_of(irq->cpu));
+	if (ret)
+		goto out_free_irq;
+
+	ret = cpuhp_state_add_instance_nocalls(cpuhp_state_num, &irq->node);
+	if (ret)
+		goto out_free_irq;
+
+	irq->irq_num = irq_num;
+	list_add(&irq->irqs_node, &dmc620_pmu_irqs);
+
+	return irq;
+
+out_free_irq:
+	free_irq(irq_num, irq);
+out_free_aff:
+	kfree(irq);
+	return ERR_PTR(ret);
+}
+
+static int dmc620_pmu_get_irq(struct dmc620_pmu *dmc620_pmu, int irq_num)
+{
+	struct dmc620_pmu_irq *irq;
+
+	mutex_lock(&dmc620_pmu_irqs_lock);
+	irq = __dmc620_pmu_get_irq(irq_num);
+	mutex_unlock(&dmc620_pmu_irqs_lock);
+
+	if (IS_ERR(irq))
+		return PTR_ERR(irq);
+
+	dmc620_pmu->irq = irq;
+	mutex_lock(&dmc620_pmu_node_lock);
+	list_add_rcu(&dmc620_pmu->pmus_node, &irq->pmus_node);
+	mutex_unlock(&dmc620_pmu_node_lock);
+
+	return 0;
+}
+
+static void dmc620_pmu_put_irq(struct dmc620_pmu *dmc620_pmu)
+{
+	struct dmc620_pmu_irq *irq = dmc620_pmu->irq;
+
+	mutex_lock(&dmc620_pmu_node_lock);
+	list_del_rcu(&dmc620_pmu->pmus_node);
+	mutex_unlock(&dmc620_pmu_node_lock);
+
+	mutex_lock(&dmc620_pmu_irqs_lock);
+	if (!refcount_dec_and_test(&irq->refcount)) {
+		mutex_unlock(&dmc620_pmu_irqs_lock);
+		return;
+	}
+
+	list_del(&irq->irqs_node);
+	mutex_unlock(&dmc620_pmu_irqs_lock);
+
+	free_irq(irq->irq_num, irq);
+	cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &irq->node);
+	kfree(irq);
+}
+
+static int dmc620_pmu_event_init(struct perf_event *event)
+{
+	struct dmc620_pmu *dmc620_pmu = to_dmc620_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	struct perf_event *sibling;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/*
+	 * DMC 620 PMUs are shared across all cpus and cannot
+	 * support task bound and sampling events.
+	 */
+	if (is_sampling_event(event) ||
+		event->attach_state & PERF_ATTACH_TASK) {
+		dev_dbg(dmc620_pmu->pmu.dev,
+			"Can't support per-task counters\n");
+		return -EOPNOTSUPP;
+	}
+
+	/*
+	 * Many perf core operations (eg. events rotation) operate on a
+	 * single CPU context. This is obvious for CPU PMUs, where one
+	 * expects the same sets of events being observed on all CPUs,
+	 * but can lead to issues for off-core PMUs, where each
+	 * event could be theoretically assigned to a different CPU. To
+	 * mitigate this, we enforce CPU assignment to one, selected
+	 * processor.
+	 */
+	event->cpu = dmc620_pmu->irq->cpu;
+	if (event->cpu < 0)
+		return -EINVAL;
+
+	/*
+	 * We can't atomically disable all HW counters so only one event allowed,
+	 * although software events are acceptable.
+	 */
+	if (event->group_leader != event &&
+			!is_software_event(event->group_leader))
+		return -EINVAL;
+
+	for_each_sibling_event(sibling, event->group_leader) {
+		if (sibling != event &&
+				!is_software_event(sibling))
+			return -EINVAL;
+	}
+
+	hwc->idx = -1;
+	return 0;
+}
+
+static void dmc620_pmu_read(struct perf_event *event)
+{
+	dmc620_pmu_event_update(event);
+}
+
+static void dmc620_pmu_start(struct perf_event *event, int flags)
+{
+	event->hw.state = 0;
+	dmc620_pmu_event_set_period(event);
+	dmc620_pmu_enable_counter(event);
+}
+
+static void dmc620_pmu_stop(struct perf_event *event, int flags)
+{
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
+	dmc620_pmu_disable_counter(event);
+	dmc620_pmu_event_update(event);
+	event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static int dmc620_pmu_add(struct perf_event *event, int flags)
+{
+	struct dmc620_pmu *dmc620_pmu = to_dmc620_pmu(event->pmu);
+	struct perf_event_attr *attr = &event->attr;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+	u64 reg;
+
+	idx = dmc620_get_event_idx(event);
+	if (idx < 0)
+		return idx;
+
+	hwc->idx = idx;
+	dmc620_pmu->events[idx] = event;
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+	reg = ATTR_CFG_GET_FLD(attr, mask);
+	dmc620_pmu_creg_write(dmc620_pmu,
+			      idx, DMC620_PMU_COUNTERn_MASK_31_00, lower_32_bits(reg));
+	dmc620_pmu_creg_write(dmc620_pmu,
+			      idx, DMC620_PMU_COUNTERn_MASK_63_32, upper_32_bits(reg));
+
+	reg = ATTR_CFG_GET_FLD(attr, match);
+	dmc620_pmu_creg_write(dmc620_pmu,
+			      idx, DMC620_PMU_COUNTERn_MATCH_31_00, lower_32_bits(reg));
+	dmc620_pmu_creg_write(dmc620_pmu,
+			      idx, DMC620_PMU_COUNTERn_MATCH_63_32, upper_32_bits(reg));
+
+	if (flags & PERF_EF_START)
+		dmc620_pmu_start(event, PERF_EF_RELOAD);
+
+	perf_event_update_userpage(event);
+	return 0;
+}
+
+static void dmc620_pmu_del(struct perf_event *event, int flags)
+{
+	struct dmc620_pmu *dmc620_pmu = to_dmc620_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	dmc620_pmu_stop(event, PERF_EF_UPDATE);
+	dmc620_pmu->events[idx] = NULL;
+	clear_bit(idx, dmc620_pmu->used_mask);
+	perf_event_update_userpage(event);
+}
+
+static int dmc620_pmu_cpu_teardown(unsigned int cpu,
+				   struct hlist_node *node)
+{
+	struct dmc620_pmu_irq *irq;
+	struct dmc620_pmu *dmc620_pmu;
+	unsigned int target;
+
+	irq = hlist_entry_safe(node, struct dmc620_pmu_irq, node);
+	if (cpu != irq->cpu)
+		return 0;
+
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	/* We're only reading, but this isn't the place to be involving RCU */
+	mutex_lock(&dmc620_pmu_node_lock);
+	list_for_each_entry(dmc620_pmu, &irq->pmus_node, pmus_node)
+		perf_pmu_migrate_context(&dmc620_pmu->pmu, irq->cpu, target);
+	mutex_unlock(&dmc620_pmu_node_lock);
+
+	WARN_ON(irq_set_affinity(irq->irq_num, cpumask_of(target)));
+	irq->cpu = target;
+
+	return 0;
+}
+
+static int dmc620_pmu_device_probe(struct platform_device *pdev)
+{
+	struct dmc620_pmu *dmc620_pmu;
+	struct resource *res;
+	char *name;
+	int irq_num;
+	int i, ret;
+
+	dmc620_pmu = devm_kzalloc(&pdev->dev,
+			sizeof(struct dmc620_pmu), GFP_KERNEL);
+	if (!dmc620_pmu)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, dmc620_pmu);
+
+	dmc620_pmu->pmu = (struct pmu) {
+		.module = THIS_MODULE,
+		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
+		.task_ctx_nr	= perf_invalid_context,
+		.event_init	= dmc620_pmu_event_init,
+		.add		= dmc620_pmu_add,
+		.del		= dmc620_pmu_del,
+		.start		= dmc620_pmu_start,
+		.stop		= dmc620_pmu_stop,
+		.read		= dmc620_pmu_read,
+		.attr_groups	= dmc620_pmu_attr_groups,
+	};
+
+	dmc620_pmu->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+	if (IS_ERR(dmc620_pmu->base))
+		return PTR_ERR(dmc620_pmu->base);
+
+	/* Make sure device is reset before enabling interrupt */
+	for (i = 0; i < DMC620_PMU_MAX_COUNTERS; i++)
+		dmc620_pmu_creg_write(dmc620_pmu, i, DMC620_PMU_COUNTERn_CONTROL, 0);
+	writel(0, dmc620_pmu->base + DMC620_PMU_OVERFLOW_STATUS_CLKDIV2);
+	writel(0, dmc620_pmu->base + DMC620_PMU_OVERFLOW_STATUS_CLK);
+
+	irq_num = platform_get_irq(pdev, 0);
+	if (irq_num < 0)
+		return irq_num;
+
+	ret = dmc620_pmu_get_irq(dmc620_pmu, irq_num);
+	if (ret)
+		return ret;
+
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
+				  "%s_%llx", DMC620_PMUNAME,
+				  (u64)(res->start >> DMC620_PA_SHIFT));
+	if (!name) {
+		dev_err(&pdev->dev,
+			  "Create name failed, PMU @%pa\n", &res->start);
+		ret = -ENOMEM;
+		goto out_teardown_dev;
+	}
+
+	ret = perf_pmu_register(&dmc620_pmu->pmu, name, -1);
+	if (ret)
+		goto out_teardown_dev;
+
+	return 0;
+
+out_teardown_dev:
+	dmc620_pmu_put_irq(dmc620_pmu);
+	synchronize_rcu();
+	return ret;
+}
+
+static int dmc620_pmu_device_remove(struct platform_device *pdev)
+{
+	struct dmc620_pmu *dmc620_pmu = platform_get_drvdata(pdev);
+
+	dmc620_pmu_put_irq(dmc620_pmu);
+
+	/* perf will synchronise RCU before devres can free dmc620_pmu */
+	perf_pmu_unregister(&dmc620_pmu->pmu);
+
+	return 0;
+}
+
+static const struct acpi_device_id dmc620_acpi_match[] = {
+	{ "ARMHD620", 0},
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, dmc620_acpi_match);
+static struct platform_driver dmc620_pmu_driver = {
+	.driver	= {
+		.name		= DMC620_DRVNAME,
+		.acpi_match_table = dmc620_acpi_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe	= dmc620_pmu_device_probe,
+	.remove	= dmc620_pmu_device_remove,
+};
+
+static int __init dmc620_pmu_init(void)
+{
+	int ret;
+
+	cpuhp_state_num = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+				      DMC620_DRVNAME,
+				      NULL,
+				      dmc620_pmu_cpu_teardown);
+	if (cpuhp_state_num < 0)
+		return cpuhp_state_num;
+
+	ret = platform_driver_register(&dmc620_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(cpuhp_state_num);
+
+	return ret;
+}
+
+static void __exit dmc620_pmu_exit(void)
+{
+	platform_driver_unregister(&dmc620_pmu_driver);
+	cpuhp_remove_multi_state(cpuhp_state_num);
+}
+
+module_init(dmc620_pmu_init);
+module_exit(dmc620_pmu_exit);
+
+MODULE_DESCRIPTION("Perf driver for the ARM DMC-620 memory controller");
+MODULE_AUTHOR("Tuan Phan <tuanphan@os.amperecomputing.com");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c
new file mode 100644
index 0000000000..8223c49bd0
--- /dev/null
+++ b/drivers/perf/arm_dsu_pmu.c
@@ -0,0 +1,879 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ARM DynamIQ Shared Unit (DSU) PMU driver
+ *
+ * Copyright (C) ARM Limited, 2017.
+ *
+ * Based on ARM CCI-PMU, ARMv8 PMU-v3 drivers.
+ */
+
+#define PMUNAME		"arm_dsu"
+#define DRVNAME		PMUNAME "_pmu"
+#define pr_fmt(fmt)	DRVNAME ": " fmt
+
+#include <linux/acpi.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/bug.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+
+#include <asm/arm_dsu_pmu.h>
+#include <asm/local64.h>
+
+/* PMU event codes */
+#define DSU_PMU_EVT_CYCLES		0x11
+#define DSU_PMU_EVT_CHAIN		0x1e
+
+#define DSU_PMU_MAX_COMMON_EVENTS	0x40
+
+#define DSU_PMU_MAX_HW_CNTRS		32
+#define DSU_PMU_HW_COUNTER_MASK		(DSU_PMU_MAX_HW_CNTRS - 1)
+
+#define CLUSTERPMCR_E			BIT(0)
+#define CLUSTERPMCR_P			BIT(1)
+#define CLUSTERPMCR_C			BIT(2)
+#define CLUSTERPMCR_N_SHIFT		11
+#define CLUSTERPMCR_N_MASK		0x1f
+#define CLUSTERPMCR_IDCODE_SHIFT	16
+#define CLUSTERPMCR_IDCODE_MASK		0xff
+#define CLUSTERPMCR_IMP_SHIFT		24
+#define CLUSTERPMCR_IMP_MASK		0xff
+#define CLUSTERPMCR_RES_MASK		0x7e8
+#define CLUSTERPMCR_RES_VAL		0x40
+
+#define DSU_ACTIVE_CPU_MASK		0x0
+#define DSU_ASSOCIATED_CPU_MASK		0x1
+
+/*
+ * We use the index of the counters as they appear in the counter
+ * bit maps in the PMU registers (e.g CLUSTERPMSELR).
+ * i.e,
+ *	counter 0	- Bit 0
+ *	counter 1	- Bit 1
+ *	...
+ *	Cycle counter	- Bit 31
+ */
+#define DSU_PMU_IDX_CYCLE_COUNTER	31
+
+/* All event counters are 32bit, with a 64bit Cycle counter */
+#define DSU_PMU_COUNTER_WIDTH(idx)	\
+	(((idx) == DSU_PMU_IDX_CYCLE_COUNTER) ? 64 : 32)
+
+#define DSU_PMU_COUNTER_MASK(idx)	\
+	GENMASK_ULL((DSU_PMU_COUNTER_WIDTH((idx)) - 1), 0)
+
+#define DSU_EXT_ATTR(_name, _func, _config)		\
+	(&((struct dev_ext_attribute[]) {				\
+		{							\
+			.attr = __ATTR(_name, 0444, _func, NULL),	\
+			.var = (void *)_config				\
+		}							\
+	})[0].attr.attr)
+
+#define DSU_EVENT_ATTR(_name, _config)		\
+	DSU_EXT_ATTR(_name, dsu_pmu_sysfs_event_show, (unsigned long)_config)
+
+#define DSU_FORMAT_ATTR(_name, _config)		\
+	DSU_EXT_ATTR(_name, dsu_pmu_sysfs_format_show, (char *)_config)
+
+#define DSU_CPUMASK_ATTR(_name, _config)	\
+	DSU_EXT_ATTR(_name, dsu_pmu_cpumask_show, (unsigned long)_config)
+
+struct dsu_hw_events {
+	DECLARE_BITMAP(used_mask, DSU_PMU_MAX_HW_CNTRS);
+	struct perf_event	*events[DSU_PMU_MAX_HW_CNTRS];
+};
+
+/*
+ * struct dsu_pmu	- DSU PMU descriptor
+ *
+ * @pmu_lock		: Protects accesses to DSU PMU register from normal vs
+ *			  interrupt handler contexts.
+ * @hw_events		: Holds the event counter state.
+ * @associated_cpus	: CPUs attached to the DSU.
+ * @active_cpu		: CPU to which the PMU is bound for accesses.
+ * @cpuhp_node		: Node for CPU hotplug notifier link.
+ * @num_counters	: Number of event counters implemented by the PMU,
+ *			  excluding the cycle counter.
+ * @irq			: Interrupt line for counter overflow.
+ * @cpmceid_bitmap	: Bitmap for the availability of architected common
+ *			  events (event_code < 0x40).
+ */
+struct dsu_pmu {
+	struct pmu			pmu;
+	struct device			*dev;
+	raw_spinlock_t			pmu_lock;
+	struct dsu_hw_events		hw_events;
+	cpumask_t			associated_cpus;
+	cpumask_t			active_cpu;
+	struct hlist_node		cpuhp_node;
+	s8				num_counters;
+	int				irq;
+	DECLARE_BITMAP(cpmceid_bitmap, DSU_PMU_MAX_COMMON_EVENTS);
+};
+
+static unsigned long dsu_pmu_cpuhp_state;
+
+static inline struct dsu_pmu *to_dsu_pmu(struct pmu *pmu)
+{
+	return container_of(pmu, struct dsu_pmu, pmu);
+}
+
+static ssize_t dsu_pmu_sysfs_event_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct dev_ext_attribute *eattr = container_of(attr,
+					struct dev_ext_attribute, attr);
+	return sysfs_emit(buf, "event=0x%lx\n", (unsigned long)eattr->var);
+}
+
+static ssize_t dsu_pmu_sysfs_format_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct dev_ext_attribute *eattr = container_of(attr,
+					struct dev_ext_attribute, attr);
+	return sysfs_emit(buf, "%s\n", (char *)eattr->var);
+}
+
+static ssize_t dsu_pmu_cpumask_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct dsu_pmu *dsu_pmu = to_dsu_pmu(pmu);
+	struct dev_ext_attribute *eattr = container_of(attr,
+					struct dev_ext_attribute, attr);
+	unsigned long mask_id = (unsigned long)eattr->var;
+	const cpumask_t *cpumask;
+
+	switch (mask_id) {
+	case DSU_ACTIVE_CPU_MASK:
+		cpumask = &dsu_pmu->active_cpu;
+		break;
+	case DSU_ASSOCIATED_CPU_MASK:
+		cpumask = &dsu_pmu->associated_cpus;
+		break;
+	default:
+		return 0;
+	}
+	return cpumap_print_to_pagebuf(true, buf, cpumask);
+}
+
+static struct attribute *dsu_pmu_format_attrs[] = {
+	DSU_FORMAT_ATTR(event, "config:0-31"),
+	NULL,
+};
+
+static const struct attribute_group dsu_pmu_format_attr_group = {
+	.name = "format",
+	.attrs = dsu_pmu_format_attrs,
+};
+
+static struct attribute *dsu_pmu_event_attrs[] = {
+	DSU_EVENT_ATTR(cycles, 0x11),
+	DSU_EVENT_ATTR(bus_access, 0x19),
+	DSU_EVENT_ATTR(memory_error, 0x1a),
+	DSU_EVENT_ATTR(bus_cycles, 0x1d),
+	DSU_EVENT_ATTR(l3d_cache_allocate, 0x29),
+	DSU_EVENT_ATTR(l3d_cache_refill, 0x2a),
+	DSU_EVENT_ATTR(l3d_cache, 0x2b),
+	DSU_EVENT_ATTR(l3d_cache_wb, 0x2c),
+	NULL,
+};
+
+static umode_t
+dsu_pmu_event_attr_is_visible(struct kobject *kobj, struct attribute *attr,
+				int unused)
+{
+	struct pmu *pmu = dev_get_drvdata(kobj_to_dev(kobj));
+	struct dsu_pmu *dsu_pmu = to_dsu_pmu(pmu);
+	struct dev_ext_attribute *eattr = container_of(attr,
+					struct dev_ext_attribute, attr.attr);
+	unsigned long evt = (unsigned long)eattr->var;
+
+	return test_bit(evt, dsu_pmu->cpmceid_bitmap) ? attr->mode : 0;
+}
+
+static const struct attribute_group dsu_pmu_events_attr_group = {
+	.name = "events",
+	.attrs = dsu_pmu_event_attrs,
+	.is_visible = dsu_pmu_event_attr_is_visible,
+};
+
+static struct attribute *dsu_pmu_cpumask_attrs[] = {
+	DSU_CPUMASK_ATTR(cpumask, DSU_ACTIVE_CPU_MASK),
+	DSU_CPUMASK_ATTR(associated_cpus, DSU_ASSOCIATED_CPU_MASK),
+	NULL,
+};
+
+static const struct attribute_group dsu_pmu_cpumask_attr_group = {
+	.attrs = dsu_pmu_cpumask_attrs,
+};
+
+static const struct attribute_group *dsu_pmu_attr_groups[] = {
+	&dsu_pmu_cpumask_attr_group,
+	&dsu_pmu_events_attr_group,
+	&dsu_pmu_format_attr_group,
+	NULL,
+};
+
+static int dsu_pmu_get_online_cpu_any_but(struct dsu_pmu *dsu_pmu, int cpu)
+{
+	struct cpumask online_supported;
+
+	cpumask_and(&online_supported,
+			 &dsu_pmu->associated_cpus, cpu_online_mask);
+	return cpumask_any_but(&online_supported, cpu);
+}
+
+static inline bool dsu_pmu_counter_valid(struct dsu_pmu *dsu_pmu, u32 idx)
+{
+	return (idx < dsu_pmu->num_counters) ||
+	       (idx == DSU_PMU_IDX_CYCLE_COUNTER);
+}
+
+static inline u64 dsu_pmu_read_counter(struct perf_event *event)
+{
+	u64 val;
+	unsigned long flags;
+	struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
+	int idx = event->hw.idx;
+
+	if (WARN_ON(!cpumask_test_cpu(smp_processor_id(),
+				 &dsu_pmu->associated_cpus)))
+		return 0;
+
+	if (!dsu_pmu_counter_valid(dsu_pmu, idx)) {
+		dev_err(event->pmu->dev,
+			"Trying reading invalid counter %d\n", idx);
+		return 0;
+	}
+
+	raw_spin_lock_irqsave(&dsu_pmu->pmu_lock, flags);
+	if (idx == DSU_PMU_IDX_CYCLE_COUNTER)
+		val = __dsu_pmu_read_pmccntr();
+	else
+		val = __dsu_pmu_read_counter(idx);
+	raw_spin_unlock_irqrestore(&dsu_pmu->pmu_lock, flags);
+
+	return val;
+}
+
+static void dsu_pmu_write_counter(struct perf_event *event, u64 val)
+{
+	unsigned long flags;
+	struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
+	int idx = event->hw.idx;
+
+	if (WARN_ON(!cpumask_test_cpu(smp_processor_id(),
+			 &dsu_pmu->associated_cpus)))
+		return;
+
+	if (!dsu_pmu_counter_valid(dsu_pmu, idx)) {
+		dev_err(event->pmu->dev,
+			"writing to invalid counter %d\n", idx);
+		return;
+	}
+
+	raw_spin_lock_irqsave(&dsu_pmu->pmu_lock, flags);
+	if (idx == DSU_PMU_IDX_CYCLE_COUNTER)
+		__dsu_pmu_write_pmccntr(val);
+	else
+		__dsu_pmu_write_counter(idx, val);
+	raw_spin_unlock_irqrestore(&dsu_pmu->pmu_lock, flags);
+}
+
+static int dsu_pmu_get_event_idx(struct dsu_hw_events *hw_events,
+				 struct perf_event *event)
+{
+	int idx;
+	unsigned long evtype = event->attr.config;
+	struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
+	unsigned long *used_mask = hw_events->used_mask;
+
+	if (evtype == DSU_PMU_EVT_CYCLES) {
+		if (test_and_set_bit(DSU_PMU_IDX_CYCLE_COUNTER, used_mask))
+			return -EAGAIN;
+		return DSU_PMU_IDX_CYCLE_COUNTER;
+	}
+
+	idx = find_first_zero_bit(used_mask, dsu_pmu->num_counters);
+	if (idx >= dsu_pmu->num_counters)
+		return -EAGAIN;
+	set_bit(idx, hw_events->used_mask);
+	return idx;
+}
+
+static void dsu_pmu_enable_counter(struct dsu_pmu *dsu_pmu, int idx)
+{
+	__dsu_pmu_counter_interrupt_enable(idx);
+	__dsu_pmu_enable_counter(idx);
+}
+
+static void dsu_pmu_disable_counter(struct dsu_pmu *dsu_pmu, int idx)
+{
+	__dsu_pmu_disable_counter(idx);
+	__dsu_pmu_counter_interrupt_disable(idx);
+}
+
+static inline void dsu_pmu_set_event(struct dsu_pmu *dsu_pmu,
+					struct perf_event *event)
+{
+	int idx = event->hw.idx;
+	unsigned long flags;
+
+	if (!dsu_pmu_counter_valid(dsu_pmu, idx)) {
+		dev_err(event->pmu->dev,
+			"Trying to set invalid counter %d\n", idx);
+		return;
+	}
+
+	raw_spin_lock_irqsave(&dsu_pmu->pmu_lock, flags);
+	__dsu_pmu_set_event(idx, event->hw.config_base);
+	raw_spin_unlock_irqrestore(&dsu_pmu->pmu_lock, flags);
+}
+
+static void dsu_pmu_event_update(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 delta, prev_count, new_count;
+
+	do {
+		/* We may also be called from the irq handler */
+		prev_count = local64_read(&hwc->prev_count);
+		new_count = dsu_pmu_read_counter(event);
+	} while (local64_cmpxchg(&hwc->prev_count, prev_count, new_count) !=
+			prev_count);
+	delta = (new_count - prev_count) & DSU_PMU_COUNTER_MASK(hwc->idx);
+	local64_add(delta, &event->count);
+}
+
+static void dsu_pmu_read(struct perf_event *event)
+{
+	dsu_pmu_event_update(event);
+}
+
+static inline u32 dsu_pmu_get_reset_overflow(void)
+{
+	return __dsu_pmu_get_reset_overflow();
+}
+
+/**
+ * dsu_pmu_set_event_period: Set the period for the counter.
+ *
+ * All DSU PMU event counters, except the cycle counter are 32bit
+ * counters. To handle cases of extreme interrupt latency, we program
+ * the counter with half of the max count for the counters.
+ */
+static void dsu_pmu_set_event_period(struct perf_event *event)
+{
+	int idx = event->hw.idx;
+	u64 val = DSU_PMU_COUNTER_MASK(idx) >> 1;
+
+	local64_set(&event->hw.prev_count, val);
+	dsu_pmu_write_counter(event, val);
+}
+
+static irqreturn_t dsu_pmu_handle_irq(int irq_num, void *dev)
+{
+	int i;
+	bool handled = false;
+	struct dsu_pmu *dsu_pmu = dev;
+	struct dsu_hw_events *hw_events = &dsu_pmu->hw_events;
+	unsigned long overflow;
+
+	overflow = dsu_pmu_get_reset_overflow();
+	if (!overflow)
+		return IRQ_NONE;
+
+	for_each_set_bit(i, &overflow, DSU_PMU_MAX_HW_CNTRS) {
+		struct perf_event *event = hw_events->events[i];
+
+		if (!event)
+			continue;
+		dsu_pmu_event_update(event);
+		dsu_pmu_set_event_period(event);
+		handled = true;
+	}
+
+	return IRQ_RETVAL(handled);
+}
+
+static void dsu_pmu_start(struct perf_event *event, int pmu_flags)
+{
+	struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
+
+	/* We always reprogram the counter */
+	if (pmu_flags & PERF_EF_RELOAD)
+		WARN_ON(!(event->hw.state & PERF_HES_UPTODATE));
+	dsu_pmu_set_event_period(event);
+	if (event->hw.idx != DSU_PMU_IDX_CYCLE_COUNTER)
+		dsu_pmu_set_event(dsu_pmu, event);
+	event->hw.state = 0;
+	dsu_pmu_enable_counter(dsu_pmu, event->hw.idx);
+}
+
+static void dsu_pmu_stop(struct perf_event *event, int pmu_flags)
+{
+	struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
+
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+	dsu_pmu_disable_counter(dsu_pmu, event->hw.idx);
+	dsu_pmu_event_update(event);
+	event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static int dsu_pmu_add(struct perf_event *event, int flags)
+{
+	struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
+	struct dsu_hw_events *hw_events = &dsu_pmu->hw_events;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+
+	if (WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(),
+					   &dsu_pmu->associated_cpus)))
+		return -ENOENT;
+
+	idx = dsu_pmu_get_event_idx(hw_events, event);
+	if (idx < 0)
+		return idx;
+
+	hwc->idx = idx;
+	hw_events->events[idx] = event;
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+	if (flags & PERF_EF_START)
+		dsu_pmu_start(event, PERF_EF_RELOAD);
+
+	perf_event_update_userpage(event);
+	return 0;
+}
+
+static void dsu_pmu_del(struct perf_event *event, int flags)
+{
+	struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
+	struct dsu_hw_events *hw_events = &dsu_pmu->hw_events;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	dsu_pmu_stop(event, PERF_EF_UPDATE);
+	hw_events->events[idx] = NULL;
+	clear_bit(idx, hw_events->used_mask);
+	perf_event_update_userpage(event);
+}
+
+static void dsu_pmu_enable(struct pmu *pmu)
+{
+	u32 pmcr;
+	unsigned long flags;
+	struct dsu_pmu *dsu_pmu = to_dsu_pmu(pmu);
+
+	/* If no counters are added, skip enabling the PMU */
+	if (bitmap_empty(dsu_pmu->hw_events.used_mask, DSU_PMU_MAX_HW_CNTRS))
+		return;
+
+	raw_spin_lock_irqsave(&dsu_pmu->pmu_lock, flags);
+	pmcr = __dsu_pmu_read_pmcr();
+	pmcr |= CLUSTERPMCR_E;
+	__dsu_pmu_write_pmcr(pmcr);
+	raw_spin_unlock_irqrestore(&dsu_pmu->pmu_lock, flags);
+}
+
+static void dsu_pmu_disable(struct pmu *pmu)
+{
+	u32 pmcr;
+	unsigned long flags;
+	struct dsu_pmu *dsu_pmu = to_dsu_pmu(pmu);
+
+	raw_spin_lock_irqsave(&dsu_pmu->pmu_lock, flags);
+	pmcr = __dsu_pmu_read_pmcr();
+	pmcr &= ~CLUSTERPMCR_E;
+	__dsu_pmu_write_pmcr(pmcr);
+	raw_spin_unlock_irqrestore(&dsu_pmu->pmu_lock, flags);
+}
+
+static bool dsu_pmu_validate_event(struct pmu *pmu,
+				  struct dsu_hw_events *hw_events,
+				  struct perf_event *event)
+{
+	if (is_software_event(event))
+		return true;
+	/* Reject groups spanning multiple HW PMUs. */
+	if (event->pmu != pmu)
+		return false;
+	return dsu_pmu_get_event_idx(hw_events, event) >= 0;
+}
+
+/*
+ * Make sure the group of events can be scheduled at once
+ * on the PMU.
+ */
+static bool dsu_pmu_validate_group(struct perf_event *event)
+{
+	struct perf_event *sibling, *leader = event->group_leader;
+	struct dsu_hw_events fake_hw;
+
+	if (event->group_leader == event)
+		return true;
+
+	memset(fake_hw.used_mask, 0, sizeof(fake_hw.used_mask));
+	if (!dsu_pmu_validate_event(event->pmu, &fake_hw, leader))
+		return false;
+	for_each_sibling_event(sibling, leader) {
+		if (!dsu_pmu_validate_event(event->pmu, &fake_hw, sibling))
+			return false;
+	}
+	return dsu_pmu_validate_event(event->pmu, &fake_hw, event);
+}
+
+static int dsu_pmu_event_init(struct perf_event *event)
+{
+	struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/* We don't support sampling */
+	if (is_sampling_event(event)) {
+		dev_dbg(dsu_pmu->pmu.dev, "Can't support sampling events\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* We cannot support task bound events */
+	if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK) {
+		dev_dbg(dsu_pmu->pmu.dev, "Can't support per-task counters\n");
+		return -EINVAL;
+	}
+
+	if (has_branch_stack(event)) {
+		dev_dbg(dsu_pmu->pmu.dev, "Can't support filtering\n");
+		return -EINVAL;
+	}
+
+	if (!cpumask_test_cpu(event->cpu, &dsu_pmu->associated_cpus)) {
+		dev_dbg(dsu_pmu->pmu.dev,
+			 "Requested cpu is not associated with the DSU\n");
+		return -EINVAL;
+	}
+	/*
+	 * Choose the current active CPU to read the events. We don't want
+	 * to migrate the event contexts, irq handling etc to the requested
+	 * CPU. As long as the requested CPU is within the same DSU, we
+	 * are fine.
+	 */
+	event->cpu = cpumask_first(&dsu_pmu->active_cpu);
+	if (event->cpu >= nr_cpu_ids)
+		return -EINVAL;
+	if (!dsu_pmu_validate_group(event))
+		return -EINVAL;
+
+	event->hw.config_base = event->attr.config;
+	return 0;
+}
+
+static struct dsu_pmu *dsu_pmu_alloc(struct platform_device *pdev)
+{
+	struct dsu_pmu *dsu_pmu;
+
+	dsu_pmu = devm_kzalloc(&pdev->dev, sizeof(*dsu_pmu), GFP_KERNEL);
+	if (!dsu_pmu)
+		return ERR_PTR(-ENOMEM);
+
+	raw_spin_lock_init(&dsu_pmu->pmu_lock);
+	/*
+	 * Initialise the number of counters to -1, until we probe
+	 * the real number on a connected CPU.
+	 */
+	dsu_pmu->num_counters = -1;
+	return dsu_pmu;
+}
+
+/**
+ * dsu_pmu_dt_get_cpus: Get the list of CPUs in the cluster
+ * from device tree.
+ */
+static int dsu_pmu_dt_get_cpus(struct device *dev, cpumask_t *mask)
+{
+	int i = 0, n, cpu;
+	struct device_node *cpu_node;
+
+	n = of_count_phandle_with_args(dev->of_node, "cpus", NULL);
+	if (n <= 0)
+		return -ENODEV;
+	for (; i < n; i++) {
+		cpu_node = of_parse_phandle(dev->of_node, "cpus", i);
+		if (!cpu_node)
+			break;
+		cpu = of_cpu_node_to_id(cpu_node);
+		of_node_put(cpu_node);
+		/*
+		 * We have to ignore the failures here and continue scanning
+		 * the list to handle cases where the nr_cpus could be capped
+		 * in the running kernel.
+		 */
+		if (cpu < 0)
+			continue;
+		cpumask_set_cpu(cpu, mask);
+	}
+	return 0;
+}
+
+/**
+ * dsu_pmu_acpi_get_cpus: Get the list of CPUs in the cluster
+ * from ACPI.
+ */
+static int dsu_pmu_acpi_get_cpus(struct device *dev, cpumask_t *mask)
+{
+#ifdef CONFIG_ACPI
+	struct acpi_device *parent_adev = acpi_dev_parent(ACPI_COMPANION(dev));
+	int cpu;
+
+	/*
+	 * A dsu pmu node is inside a cluster parent node along with cpu nodes.
+	 * We need to find out all cpus that have the same parent with this pmu.
+	 */
+	for_each_possible_cpu(cpu) {
+		struct acpi_device *acpi_dev;
+		struct device *cpu_dev = get_cpu_device(cpu);
+
+		if (!cpu_dev)
+			continue;
+
+		acpi_dev = ACPI_COMPANION(cpu_dev);
+		if (acpi_dev && acpi_dev_parent(acpi_dev) == parent_adev)
+			cpumask_set_cpu(cpu, mask);
+	}
+#endif
+
+	return 0;
+}
+
+/*
+ * dsu_pmu_probe_pmu: Probe the PMU details on a CPU in the cluster.
+ */
+static void dsu_pmu_probe_pmu(struct dsu_pmu *dsu_pmu)
+{
+	u64 num_counters;
+	u32 cpmceid[2];
+
+	num_counters = (__dsu_pmu_read_pmcr() >> CLUSTERPMCR_N_SHIFT) &
+						CLUSTERPMCR_N_MASK;
+	/* We can only support up to 31 independent counters */
+	if (WARN_ON(num_counters > 31))
+		num_counters = 31;
+	dsu_pmu->num_counters = num_counters;
+	if (!dsu_pmu->num_counters)
+		return;
+	cpmceid[0] = __dsu_pmu_read_pmceid(0);
+	cpmceid[1] = __dsu_pmu_read_pmceid(1);
+	bitmap_from_arr32(dsu_pmu->cpmceid_bitmap, cpmceid,
+			  DSU_PMU_MAX_COMMON_EVENTS);
+}
+
+static void dsu_pmu_set_active_cpu(int cpu, struct dsu_pmu *dsu_pmu)
+{
+	cpumask_set_cpu(cpu, &dsu_pmu->active_cpu);
+	if (irq_set_affinity(dsu_pmu->irq, &dsu_pmu->active_cpu))
+		pr_warn("Failed to set irq affinity to %d\n", cpu);
+}
+
+/*
+ * dsu_pmu_init_pmu: Initialise the DSU PMU configurations if
+ * we haven't done it already.
+ */
+static void dsu_pmu_init_pmu(struct dsu_pmu *dsu_pmu)
+{
+	if (dsu_pmu->num_counters == -1)
+		dsu_pmu_probe_pmu(dsu_pmu);
+	/* Reset the interrupt overflow mask */
+	dsu_pmu_get_reset_overflow();
+}
+
+static int dsu_pmu_device_probe(struct platform_device *pdev)
+{
+	int irq, rc;
+	struct dsu_pmu *dsu_pmu;
+	struct fwnode_handle *fwnode = dev_fwnode(&pdev->dev);
+	char *name;
+	static atomic_t pmu_idx = ATOMIC_INIT(-1);
+
+	dsu_pmu = dsu_pmu_alloc(pdev);
+	if (IS_ERR(dsu_pmu))
+		return PTR_ERR(dsu_pmu);
+
+	if (is_of_node(fwnode))
+		rc = dsu_pmu_dt_get_cpus(&pdev->dev, &dsu_pmu->associated_cpus);
+	else if (is_acpi_device_node(fwnode))
+		rc = dsu_pmu_acpi_get_cpus(&pdev->dev, &dsu_pmu->associated_cpus);
+	else
+		return -ENOENT;
+
+	if (rc) {
+		dev_warn(&pdev->dev, "Failed to parse the CPUs\n");
+		return rc;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return -EINVAL;
+
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s_%d",
+				PMUNAME, atomic_inc_return(&pmu_idx));
+	if (!name)
+		return -ENOMEM;
+	rc = devm_request_irq(&pdev->dev, irq, dsu_pmu_handle_irq,
+			      IRQF_NOBALANCING, name, dsu_pmu);
+	if (rc) {
+		dev_warn(&pdev->dev, "Failed to request IRQ %d\n", irq);
+		return rc;
+	}
+
+	dsu_pmu->irq = irq;
+	platform_set_drvdata(pdev, dsu_pmu);
+	rc = cpuhp_state_add_instance(dsu_pmu_cpuhp_state,
+						&dsu_pmu->cpuhp_node);
+	if (rc)
+		return rc;
+
+	dsu_pmu->pmu = (struct pmu) {
+		.task_ctx_nr	= perf_invalid_context,
+		.module		= THIS_MODULE,
+		.pmu_enable	= dsu_pmu_enable,
+		.pmu_disable	= dsu_pmu_disable,
+		.event_init	= dsu_pmu_event_init,
+		.add		= dsu_pmu_add,
+		.del		= dsu_pmu_del,
+		.start		= dsu_pmu_start,
+		.stop		= dsu_pmu_stop,
+		.read		= dsu_pmu_read,
+
+		.attr_groups	= dsu_pmu_attr_groups,
+		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
+	};
+
+	rc = perf_pmu_register(&dsu_pmu->pmu, name, -1);
+	if (rc) {
+		cpuhp_state_remove_instance(dsu_pmu_cpuhp_state,
+						 &dsu_pmu->cpuhp_node);
+	}
+
+	return rc;
+}
+
+static int dsu_pmu_device_remove(struct platform_device *pdev)
+{
+	struct dsu_pmu *dsu_pmu = platform_get_drvdata(pdev);
+
+	perf_pmu_unregister(&dsu_pmu->pmu);
+	cpuhp_state_remove_instance(dsu_pmu_cpuhp_state, &dsu_pmu->cpuhp_node);
+
+	return 0;
+}
+
+static const struct of_device_id dsu_pmu_of_match[] = {
+	{ .compatible = "arm,dsu-pmu", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, dsu_pmu_of_match);
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id dsu_pmu_acpi_match[] = {
+	{ "ARMHD500", 0},
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, dsu_pmu_acpi_match);
+#endif
+
+static struct platform_driver dsu_pmu_driver = {
+	.driver = {
+		.name	= DRVNAME,
+		.of_match_table = of_match_ptr(dsu_pmu_of_match),
+		.acpi_match_table = ACPI_PTR(dsu_pmu_acpi_match),
+		.suppress_bind_attrs = true,
+	},
+	.probe = dsu_pmu_device_probe,
+	.remove = dsu_pmu_device_remove,
+};
+
+static int dsu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
+{
+	struct dsu_pmu *dsu_pmu = hlist_entry_safe(node, struct dsu_pmu,
+						   cpuhp_node);
+
+	if (!cpumask_test_cpu(cpu, &dsu_pmu->associated_cpus))
+		return 0;
+
+	/* If the PMU is already managed, there is nothing to do */
+	if (!cpumask_empty(&dsu_pmu->active_cpu))
+		return 0;
+
+	dsu_pmu_init_pmu(dsu_pmu);
+	dsu_pmu_set_active_cpu(cpu, dsu_pmu);
+
+	return 0;
+}
+
+static int dsu_pmu_cpu_teardown(unsigned int cpu, struct hlist_node *node)
+{
+	int dst;
+	struct dsu_pmu *dsu_pmu = hlist_entry_safe(node, struct dsu_pmu,
+						   cpuhp_node);
+
+	if (!cpumask_test_and_clear_cpu(cpu, &dsu_pmu->active_cpu))
+		return 0;
+
+	dst = dsu_pmu_get_online_cpu_any_but(dsu_pmu, cpu);
+	/* If there are no active CPUs in the DSU, leave IRQ disabled */
+	if (dst >= nr_cpu_ids)
+		return 0;
+
+	perf_pmu_migrate_context(&dsu_pmu->pmu, cpu, dst);
+	dsu_pmu_set_active_cpu(dst, dsu_pmu);
+
+	return 0;
+}
+
+static int __init dsu_pmu_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+					DRVNAME,
+					dsu_pmu_cpu_online,
+					dsu_pmu_cpu_teardown);
+	if (ret < 0)
+		return ret;
+	dsu_pmu_cpuhp_state = ret;
+	ret = platform_driver_register(&dsu_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(dsu_pmu_cpuhp_state);
+
+	return ret;
+}
+
+static void __exit dsu_pmu_exit(void)
+{
+	platform_driver_unregister(&dsu_pmu_driver);
+	cpuhp_remove_multi_state(dsu_pmu_cpuhp_state);
+}
+
+module_init(dsu_pmu_init);
+module_exit(dsu_pmu_exit);
+
+MODULE_DESCRIPTION("Perf driver for ARM DynamIQ Shared Unit");
+MODULE_AUTHOR("Suzuki K Poulose <suzuki.poulose@arm.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
new file mode 100644
index 0000000000..d712a19e47
--- /dev/null
+++ b/drivers/perf/arm_pmu.c
@@ -0,0 +1,955 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#undef DEBUG
+
+/*
+ * ARM performance counter support.
+ *
+ * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
+ * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
+ *
+ * This code is based on the sparc64 perf event code, which is in turn based
+ * on the x86 code.
+ */
+#define pr_fmt(fmt) "hw perfevents: " fmt
+
+#include <linux/bitmap.h>
+#include <linux/cpumask.h>
+#include <linux/cpu_pm.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/slab.h>
+#include <linux/sched/clock.h>
+#include <linux/spinlock.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
+
+#include <asm/irq_regs.h>
+
+static int armpmu_count_irq_users(const int irq);
+
+struct pmu_irq_ops {
+	void (*enable_pmuirq)(unsigned int irq);
+	void (*disable_pmuirq)(unsigned int irq);
+	void (*free_pmuirq)(unsigned int irq, int cpu, void __percpu *devid);
+};
+
+static void armpmu_free_pmuirq(unsigned int irq, int cpu, void __percpu *devid)
+{
+	free_irq(irq, per_cpu_ptr(devid, cpu));
+}
+
+static const struct pmu_irq_ops pmuirq_ops = {
+	.enable_pmuirq = enable_irq,
+	.disable_pmuirq = disable_irq_nosync,
+	.free_pmuirq = armpmu_free_pmuirq
+};
+
+static void armpmu_free_pmunmi(unsigned int irq, int cpu, void __percpu *devid)
+{
+	free_nmi(irq, per_cpu_ptr(devid, cpu));
+}
+
+static const struct pmu_irq_ops pmunmi_ops = {
+	.enable_pmuirq = enable_nmi,
+	.disable_pmuirq = disable_nmi_nosync,
+	.free_pmuirq = armpmu_free_pmunmi
+};
+
+static void armpmu_enable_percpu_pmuirq(unsigned int irq)
+{
+	enable_percpu_irq(irq, IRQ_TYPE_NONE);
+}
+
+static void armpmu_free_percpu_pmuirq(unsigned int irq, int cpu,
+				   void __percpu *devid)
+{
+	if (armpmu_count_irq_users(irq) == 1)
+		free_percpu_irq(irq, devid);
+}
+
+static const struct pmu_irq_ops percpu_pmuirq_ops = {
+	.enable_pmuirq = armpmu_enable_percpu_pmuirq,
+	.disable_pmuirq = disable_percpu_irq,
+	.free_pmuirq = armpmu_free_percpu_pmuirq
+};
+
+static void armpmu_enable_percpu_pmunmi(unsigned int irq)
+{
+	if (!prepare_percpu_nmi(irq))
+		enable_percpu_nmi(irq, IRQ_TYPE_NONE);
+}
+
+static void armpmu_disable_percpu_pmunmi(unsigned int irq)
+{
+	disable_percpu_nmi(irq);
+	teardown_percpu_nmi(irq);
+}
+
+static void armpmu_free_percpu_pmunmi(unsigned int irq, int cpu,
+				      void __percpu *devid)
+{
+	if (armpmu_count_irq_users(irq) == 1)
+		free_percpu_nmi(irq, devid);
+}
+
+static const struct pmu_irq_ops percpu_pmunmi_ops = {
+	.enable_pmuirq = armpmu_enable_percpu_pmunmi,
+	.disable_pmuirq = armpmu_disable_percpu_pmunmi,
+	.free_pmuirq = armpmu_free_percpu_pmunmi
+};
+
+static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu);
+static DEFINE_PER_CPU(int, cpu_irq);
+static DEFINE_PER_CPU(const struct pmu_irq_ops *, cpu_irq_ops);
+
+static bool has_nmi;
+
+static inline u64 arm_pmu_event_max_period(struct perf_event *event)
+{
+	if (event->hw.flags & ARMPMU_EVT_64BIT)
+		return GENMASK_ULL(63, 0);
+	else if (event->hw.flags & ARMPMU_EVT_63BIT)
+		return GENMASK_ULL(62, 0);
+	else if (event->hw.flags & ARMPMU_EVT_47BIT)
+		return GENMASK_ULL(46, 0);
+	else
+		return GENMASK_ULL(31, 0);
+}
+
+static int
+armpmu_map_cache_event(const unsigned (*cache_map)
+				      [PERF_COUNT_HW_CACHE_MAX]
+				      [PERF_COUNT_HW_CACHE_OP_MAX]
+				      [PERF_COUNT_HW_CACHE_RESULT_MAX],
+		       u64 config)
+{
+	unsigned int cache_type, cache_op, cache_result, ret;
+
+	cache_type = (config >>  0) & 0xff;
+	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+		return -EINVAL;
+
+	cache_op = (config >>  8) & 0xff;
+	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+		return -EINVAL;
+
+	cache_result = (config >> 16) & 0xff;
+	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return -EINVAL;
+
+	if (!cache_map)
+		return -ENOENT;
+
+	ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
+
+	if (ret == CACHE_OP_UNSUPPORTED)
+		return -ENOENT;
+
+	return ret;
+}
+
+static int
+armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
+{
+	int mapping;
+
+	if (config >= PERF_COUNT_HW_MAX)
+		return -EINVAL;
+
+	if (!event_map)
+		return -ENOENT;
+
+	mapping = (*event_map)[config];
+	return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
+}
+
+static int
+armpmu_map_raw_event(u32 raw_event_mask, u64 config)
+{
+	return (int)(config & raw_event_mask);
+}
+
+int
+armpmu_map_event(struct perf_event *event,
+		 const unsigned (*event_map)[PERF_COUNT_HW_MAX],
+		 const unsigned (*cache_map)
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX],
+		 u32 raw_event_mask)
+{
+	u64 config = event->attr.config;
+	int type = event->attr.type;
+
+	if (type == event->pmu->type)
+		return armpmu_map_raw_event(raw_event_mask, config);
+
+	switch (type) {
+	case PERF_TYPE_HARDWARE:
+		return armpmu_map_hw_event(event_map, config);
+	case PERF_TYPE_HW_CACHE:
+		return armpmu_map_cache_event(cache_map, config);
+	case PERF_TYPE_RAW:
+		return armpmu_map_raw_event(raw_event_mask, config);
+	}
+
+	return -ENOENT;
+}
+
+int armpmu_event_set_period(struct perf_event *event)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	s64 left = local64_read(&hwc->period_left);
+	s64 period = hwc->sample_period;
+	u64 max_period;
+	int ret = 0;
+
+	max_period = arm_pmu_event_max_period(event);
+	if (unlikely(left <= -period)) {
+		left = period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	/*
+	 * Limit the maximum period to prevent the counter value
+	 * from overtaking the one we are about to program. In
+	 * effect we are reducing max_period to account for
+	 * interrupt latency (and we are being very conservative).
+	 */
+	if (left > (max_period >> 1))
+		left = (max_period >> 1);
+
+	local64_set(&hwc->prev_count, (u64)-left);
+
+	armpmu->write_counter(event, (u64)(-left) & max_period);
+
+	perf_event_update_userpage(event);
+
+	return ret;
+}
+
+u64 armpmu_event_update(struct perf_event *event)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 delta, prev_raw_count, new_raw_count;
+	u64 max_period = arm_pmu_event_max_period(event);
+
+again:
+	prev_raw_count = local64_read(&hwc->prev_count);
+	new_raw_count = armpmu->read_counter(event);
+
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+			     new_raw_count) != prev_raw_count)
+		goto again;
+
+	delta = (new_raw_count - prev_raw_count) & max_period;
+
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
+
+	return new_raw_count;
+}
+
+static void
+armpmu_read(struct perf_event *event)
+{
+	armpmu_event_update(event);
+}
+
+static void
+armpmu_stop(struct perf_event *event, int flags)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	/*
+	 * ARM pmu always has to update the counter, so ignore
+	 * PERF_EF_UPDATE, see comments in armpmu_start().
+	 */
+	if (!(hwc->state & PERF_HES_STOPPED)) {
+		armpmu->disable(event);
+		armpmu_event_update(event);
+		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	}
+}
+
+static void armpmu_start(struct perf_event *event, int flags)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	/*
+	 * ARM pmu always has to reprogram the period, so ignore
+	 * PERF_EF_RELOAD, see the comment below.
+	 */
+	if (flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+	hwc->state = 0;
+	/*
+	 * Set the period again. Some counters can't be stopped, so when we
+	 * were stopped we simply disabled the IRQ source and the counter
+	 * may have been left counting. If we don't do this step then we may
+	 * get an interrupt too soon or *way* too late if the overflow has
+	 * happened since disabling.
+	 */
+	armpmu_event_set_period(event);
+	armpmu->enable(event);
+}
+
+static void
+armpmu_del(struct perf_event *event, int flags)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	armpmu_stop(event, PERF_EF_UPDATE);
+	hw_events->events[idx] = NULL;
+	armpmu->clear_event_idx(hw_events, event);
+	perf_event_update_userpage(event);
+	/* Clear the allocated counter */
+	hwc->idx = -1;
+}
+
+static int
+armpmu_add(struct perf_event *event, int flags)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+
+	/* An event following a process won't be stopped earlier */
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
+		return -ENOENT;
+
+	/* If we don't have a space for the counter then finish early. */
+	idx = armpmu->get_event_idx(hw_events, event);
+	if (idx < 0)
+		return idx;
+
+	/*
+	 * If there is an event in the counter we are going to use then make
+	 * sure it is disabled.
+	 */
+	event->hw.idx = idx;
+	armpmu->disable(event);
+	hw_events->events[idx] = event;
+
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	if (flags & PERF_EF_START)
+		armpmu_start(event, PERF_EF_RELOAD);
+
+	/* Propagate our changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+	return 0;
+}
+
+static int
+validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
+			       struct perf_event *event)
+{
+	struct arm_pmu *armpmu;
+
+	if (is_software_event(event))
+		return 1;
+
+	/*
+	 * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The
+	 * core perf code won't check that the pmu->ctx == leader->ctx
+	 * until after pmu->event_init(event).
+	 */
+	if (event->pmu != pmu)
+		return 0;
+
+	if (event->state < PERF_EVENT_STATE_OFF)
+		return 1;
+
+	if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
+		return 1;
+
+	armpmu = to_arm_pmu(event->pmu);
+	return armpmu->get_event_idx(hw_events, event) >= 0;
+}
+
+static int
+validate_group(struct perf_event *event)
+{
+	struct perf_event *sibling, *leader = event->group_leader;
+	struct pmu_hw_events fake_pmu;
+
+	/*
+	 * Initialise the fake PMU. We only need to populate the
+	 * used_mask for the purposes of validation.
+	 */
+	memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask));
+
+	if (!validate_event(event->pmu, &fake_pmu, leader))
+		return -EINVAL;
+
+	if (event == leader)
+		return 0;
+
+	for_each_sibling_event(sibling, leader) {
+		if (!validate_event(event->pmu, &fake_pmu, sibling))
+			return -EINVAL;
+	}
+
+	if (!validate_event(event->pmu, &fake_pmu, event))
+		return -EINVAL;
+
+	return 0;
+}
+
+static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
+{
+	struct arm_pmu *armpmu;
+	int ret;
+	u64 start_clock, finish_clock;
+
+	/*
+	 * we request the IRQ with a (possibly percpu) struct arm_pmu**, but
+	 * the handlers expect a struct arm_pmu*. The percpu_irq framework will
+	 * do any necessary shifting, we just need to perform the first
+	 * dereference.
+	 */
+	armpmu = *(void **)dev;
+	if (WARN_ON_ONCE(!armpmu))
+		return IRQ_NONE;
+
+	start_clock = sched_clock();
+	ret = armpmu->handle_irq(armpmu);
+	finish_clock = sched_clock();
+
+	perf_sample_event_took(finish_clock - start_clock);
+	return ret;
+}
+
+static int
+__hw_perf_event_init(struct perf_event *event)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int mapping;
+
+	hwc->flags = 0;
+	mapping = armpmu->map_event(event);
+
+	if (mapping < 0) {
+		pr_debug("event %x:%llx not supported\n", event->attr.type,
+			 event->attr.config);
+		return mapping;
+	}
+
+	/*
+	 * We don't assign an index until we actually place the event onto
+	 * hardware. Use -1 to signify that we haven't decided where to put it
+	 * yet. For SMP systems, each core has it's own PMU so we can't do any
+	 * clever allocation or constraints checking at this point.
+	 */
+	hwc->idx		= -1;
+	hwc->config_base	= 0;
+	hwc->config		= 0;
+	hwc->event_base		= 0;
+
+	/*
+	 * Check whether we need to exclude the counter from certain modes.
+	 */
+	if (armpmu->set_event_filter &&
+	    armpmu->set_event_filter(hwc, &event->attr)) {
+		pr_debug("ARM performance counters do not support "
+			 "mode exclusion\n");
+		return -EOPNOTSUPP;
+	}
+
+	/*
+	 * Store the event encoding into the config_base field.
+	 */
+	hwc->config_base	    |= (unsigned long)mapping;
+
+	if (!is_sampling_event(event)) {
+		/*
+		 * For non-sampling runs, limit the sample_period to half
+		 * of the counter width. That way, the new counter value
+		 * is far less likely to overtake the previous one unless
+		 * you have some serious IRQ latency issues.
+		 */
+		hwc->sample_period  = arm_pmu_event_max_period(event) >> 1;
+		hwc->last_period    = hwc->sample_period;
+		local64_set(&hwc->period_left, hwc->sample_period);
+	}
+
+	return validate_group(event);
+}
+
+static int armpmu_event_init(struct perf_event *event)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+
+	/*
+	 * Reject CPU-affine events for CPUs that are of a different class to
+	 * that which this PMU handles. Process-following events (where
+	 * event->cpu == -1) can be migrated between CPUs, and thus we have to
+	 * reject them later (in armpmu_add) if they're scheduled on a
+	 * different class of CPU.
+	 */
+	if (event->cpu != -1 &&
+		!cpumask_test_cpu(event->cpu, &armpmu->supported_cpus))
+		return -ENOENT;
+
+	/* does not support taken branch sampling */
+	if (has_branch_stack(event))
+		return -EOPNOTSUPP;
+
+	return __hw_perf_event_init(event);
+}
+
+static void armpmu_enable(struct pmu *pmu)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(pmu);
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
+	bool enabled = !bitmap_empty(hw_events->used_mask, armpmu->num_events);
+
+	/* For task-bound events we may be called on other CPUs */
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
+		return;
+
+	if (enabled)
+		armpmu->start(armpmu);
+}
+
+static void armpmu_disable(struct pmu *pmu)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(pmu);
+
+	/* For task-bound events we may be called on other CPUs */
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
+		return;
+
+	armpmu->stop(armpmu);
+}
+
+/*
+ * In heterogeneous systems, events are specific to a particular
+ * microarchitecture, and aren't suitable for another. Thus, only match CPUs of
+ * the same microarchitecture.
+ */
+static bool armpmu_filter(struct pmu *pmu, int cpu)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(pmu);
+	return !cpumask_test_cpu(cpu, &armpmu->supported_cpus);
+}
+
+static ssize_t cpus_show(struct device *dev,
+			 struct device_attribute *attr, char *buf)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(dev_get_drvdata(dev));
+	return cpumap_print_to_pagebuf(true, buf, &armpmu->supported_cpus);
+}
+
+static DEVICE_ATTR_RO(cpus);
+
+static struct attribute *armpmu_common_attrs[] = {
+	&dev_attr_cpus.attr,
+	NULL,
+};
+
+static const struct attribute_group armpmu_common_attr_group = {
+	.attrs = armpmu_common_attrs,
+};
+
+static int armpmu_count_irq_users(const int irq)
+{
+	int cpu, count = 0;
+
+	for_each_possible_cpu(cpu) {
+		if (per_cpu(cpu_irq, cpu) == irq)
+			count++;
+	}
+
+	return count;
+}
+
+static const struct pmu_irq_ops *armpmu_find_irq_ops(int irq)
+{
+	const struct pmu_irq_ops *ops = NULL;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		if (per_cpu(cpu_irq, cpu) != irq)
+			continue;
+
+		ops = per_cpu(cpu_irq_ops, cpu);
+		if (ops)
+			break;
+	}
+
+	return ops;
+}
+
+void armpmu_free_irq(int irq, int cpu)
+{
+	if (per_cpu(cpu_irq, cpu) == 0)
+		return;
+	if (WARN_ON(irq != per_cpu(cpu_irq, cpu)))
+		return;
+
+	per_cpu(cpu_irq_ops, cpu)->free_pmuirq(irq, cpu, &cpu_armpmu);
+
+	per_cpu(cpu_irq, cpu) = 0;
+	per_cpu(cpu_irq_ops, cpu) = NULL;
+}
+
+int armpmu_request_irq(int irq, int cpu)
+{
+	int err = 0;
+	const irq_handler_t handler = armpmu_dispatch_irq;
+	const struct pmu_irq_ops *irq_ops;
+
+	if (!irq)
+		return 0;
+
+	if (!irq_is_percpu_devid(irq)) {
+		unsigned long irq_flags;
+
+		err = irq_force_affinity(irq, cpumask_of(cpu));
+
+		if (err && num_possible_cpus() > 1) {
+			pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n",
+				irq, cpu);
+			goto err_out;
+		}
+
+		irq_flags = IRQF_PERCPU |
+			    IRQF_NOBALANCING | IRQF_NO_AUTOEN |
+			    IRQF_NO_THREAD;
+
+		err = request_nmi(irq, handler, irq_flags, "arm-pmu",
+				  per_cpu_ptr(&cpu_armpmu, cpu));
+
+		/* If cannot get an NMI, get a normal interrupt */
+		if (err) {
+			err = request_irq(irq, handler, irq_flags, "arm-pmu",
+					  per_cpu_ptr(&cpu_armpmu, cpu));
+			irq_ops = &pmuirq_ops;
+		} else {
+			has_nmi = true;
+			irq_ops = &pmunmi_ops;
+		}
+	} else if (armpmu_count_irq_users(irq) == 0) {
+		err = request_percpu_nmi(irq, handler, "arm-pmu", &cpu_armpmu);
+
+		/* If cannot get an NMI, get a normal interrupt */
+		if (err) {
+			err = request_percpu_irq(irq, handler, "arm-pmu",
+						 &cpu_armpmu);
+			irq_ops = &percpu_pmuirq_ops;
+		} else {
+			has_nmi = true;
+			irq_ops = &percpu_pmunmi_ops;
+		}
+	} else {
+		/* Per cpudevid irq was already requested by another CPU */
+		irq_ops = armpmu_find_irq_ops(irq);
+
+		if (WARN_ON(!irq_ops))
+			err = -EINVAL;
+	}
+
+	if (err)
+		goto err_out;
+
+	per_cpu(cpu_irq, cpu) = irq;
+	per_cpu(cpu_irq_ops, cpu) = irq_ops;
+	return 0;
+
+err_out:
+	pr_err("unable to request IRQ%d for ARM PMU counters\n", irq);
+	return err;
+}
+
+static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu)
+{
+	struct pmu_hw_events __percpu *hw_events = pmu->hw_events;
+	return per_cpu(hw_events->irq, cpu);
+}
+
+bool arm_pmu_irq_is_nmi(void)
+{
+	return has_nmi;
+}
+
+/*
+ * PMU hardware loses all context when a CPU goes offline.
+ * When a CPU is hotplugged back in, since some hardware registers are
+ * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
+ * junk values out of them.
+ */
+static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct arm_pmu *pmu = hlist_entry_safe(node, struct arm_pmu, node);
+	int irq;
+
+	if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
+		return 0;
+	if (pmu->reset)
+		pmu->reset(pmu);
+
+	per_cpu(cpu_armpmu, cpu) = pmu;
+
+	irq = armpmu_get_cpu_irq(pmu, cpu);
+	if (irq)
+		per_cpu(cpu_irq_ops, cpu)->enable_pmuirq(irq);
+
+	return 0;
+}
+
+static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct arm_pmu *pmu = hlist_entry_safe(node, struct arm_pmu, node);
+	int irq;
+
+	if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
+		return 0;
+
+	irq = armpmu_get_cpu_irq(pmu, cpu);
+	if (irq)
+		per_cpu(cpu_irq_ops, cpu)->disable_pmuirq(irq);
+
+	per_cpu(cpu_armpmu, cpu) = NULL;
+
+	return 0;
+}
+
+#ifdef CONFIG_CPU_PM
+static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd)
+{
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
+	struct perf_event *event;
+	int idx;
+
+	for (idx = 0; idx < armpmu->num_events; idx++) {
+		event = hw_events->events[idx];
+		if (!event)
+			continue;
+
+		switch (cmd) {
+		case CPU_PM_ENTER:
+			/*
+			 * Stop and update the counter
+			 */
+			armpmu_stop(event, PERF_EF_UPDATE);
+			break;
+		case CPU_PM_EXIT:
+		case CPU_PM_ENTER_FAILED:
+			 /*
+			  * Restore and enable the counter.
+			  */
+			armpmu_start(event, PERF_EF_RELOAD);
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
+			     void *v)
+{
+	struct arm_pmu *armpmu = container_of(b, struct arm_pmu, cpu_pm_nb);
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
+	bool enabled = !bitmap_empty(hw_events->used_mask, armpmu->num_events);
+
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
+		return NOTIFY_DONE;
+
+	/*
+	 * Always reset the PMU registers on power-up even if
+	 * there are no events running.
+	 */
+	if (cmd == CPU_PM_EXIT && armpmu->reset)
+		armpmu->reset(armpmu);
+
+	if (!enabled)
+		return NOTIFY_OK;
+
+	switch (cmd) {
+	case CPU_PM_ENTER:
+		armpmu->stop(armpmu);
+		cpu_pm_pmu_setup(armpmu, cmd);
+		break;
+	case CPU_PM_EXIT:
+	case CPU_PM_ENTER_FAILED:
+		cpu_pm_pmu_setup(armpmu, cmd);
+		armpmu->start(armpmu);
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_OK;
+}
+
+static int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu)
+{
+	cpu_pmu->cpu_pm_nb.notifier_call = cpu_pm_pmu_notify;
+	return cpu_pm_register_notifier(&cpu_pmu->cpu_pm_nb);
+}
+
+static void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu)
+{
+	cpu_pm_unregister_notifier(&cpu_pmu->cpu_pm_nb);
+}
+#else
+static inline int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu) { return 0; }
+static inline void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu) { }
+#endif
+
+static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	int err;
+
+	err = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_STARTING,
+				       &cpu_pmu->node);
+	if (err)
+		goto out;
+
+	err = cpu_pm_pmu_register(cpu_pmu);
+	if (err)
+		goto out_unregister;
+
+	return 0;
+
+out_unregister:
+	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING,
+					    &cpu_pmu->node);
+out:
+	return err;
+}
+
+static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
+{
+	cpu_pm_pmu_unregister(cpu_pmu);
+	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING,
+					    &cpu_pmu->node);
+}
+
+struct arm_pmu *armpmu_alloc(void)
+{
+	struct arm_pmu *pmu;
+	int cpu;
+
+	pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
+	if (!pmu)
+		goto out;
+
+	pmu->hw_events = alloc_percpu_gfp(struct pmu_hw_events, GFP_KERNEL);
+	if (!pmu->hw_events) {
+		pr_info("failed to allocate per-cpu PMU data.\n");
+		goto out_free_pmu;
+	}
+
+	pmu->pmu = (struct pmu) {
+		.pmu_enable	= armpmu_enable,
+		.pmu_disable	= armpmu_disable,
+		.event_init	= armpmu_event_init,
+		.add		= armpmu_add,
+		.del		= armpmu_del,
+		.start		= armpmu_start,
+		.stop		= armpmu_stop,
+		.read		= armpmu_read,
+		.filter		= armpmu_filter,
+		.attr_groups	= pmu->attr_groups,
+		/*
+		 * This is a CPU PMU potentially in a heterogeneous
+		 * configuration (e.g. big.LITTLE) so
+		 * PERF_PMU_CAP_EXTENDED_HW_TYPE is required to open
+		 * PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE events on a
+		 * specific PMU.
+		 */
+		.capabilities	= PERF_PMU_CAP_EXTENDED_REGS |
+				  PERF_PMU_CAP_EXTENDED_HW_TYPE,
+	};
+
+	pmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] =
+		&armpmu_common_attr_group;
+
+	for_each_possible_cpu(cpu) {
+		struct pmu_hw_events *events;
+
+		events = per_cpu_ptr(pmu->hw_events, cpu);
+		raw_spin_lock_init(&events->pmu_lock);
+		events->percpu_pmu = pmu;
+	}
+
+	return pmu;
+
+out_free_pmu:
+	kfree(pmu);
+out:
+	return NULL;
+}
+
+void armpmu_free(struct arm_pmu *pmu)
+{
+	free_percpu(pmu->hw_events);
+	kfree(pmu);
+}
+
+int armpmu_register(struct arm_pmu *pmu)
+{
+	int ret;
+
+	ret = cpu_pmu_init(pmu);
+	if (ret)
+		return ret;
+
+	if (!pmu->set_event_filter)
+		pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE;
+
+	ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
+	if (ret)
+		goto out_destroy;
+
+	pr_info("enabled with %s PMU driver, %d counters available%s\n",
+		pmu->name, pmu->num_events,
+		has_nmi ? ", using NMIs" : "");
+
+	kvm_host_pmu_init(pmu);
+
+	return 0;
+
+out_destroy:
+	cpu_pmu_destroy(pmu);
+	return ret;
+}
+
+static int arm_pmu_hp_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_STARTING,
+				      "perf/arm/pmu:starting",
+				      arm_perf_starting_cpu,
+				      arm_perf_teardown_cpu);
+	if (ret)
+		pr_err("CPU hotplug notifier for ARM PMU could not be registered: %d\n",
+		       ret);
+	return ret;
+}
+subsys_initcall(arm_pmu_hp_init);
diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c
new file mode 100644
index 0000000000..05dda19c53
--- /dev/null
+++ b/drivers/perf/arm_pmu_acpi.c
@@ -0,0 +1,439 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ACPI probing code for ARM performance counters.
+ *
+ * Copyright (C) 2017 ARM Ltd.
+ */
+
+#include <linux/acpi.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
+#include <linux/percpu.h>
+#include <linux/perf/arm_pmu.h>
+
+#include <asm/cpu.h>
+#include <asm/cputype.h>
+
+static DEFINE_PER_CPU(struct arm_pmu *, probed_pmus);
+static DEFINE_PER_CPU(int, pmu_irqs);
+
+static int arm_pmu_acpi_register_irq(int cpu)
+{
+	struct acpi_madt_generic_interrupt *gicc;
+	int gsi, trigger;
+
+	gicc = acpi_cpu_get_madt_gicc(cpu);
+
+	gsi = gicc->performance_interrupt;
+
+	/*
+	 * Per the ACPI spec, the MADT cannot describe a PMU that doesn't
+	 * have an interrupt. QEMU advertises this by using a GSI of zero,
+	 * which is not known to be valid on any hardware despite being
+	 * valid per the spec. Take the pragmatic approach and reject a
+	 * GSI of zero for now.
+	 */
+	if (!gsi)
+		return 0;
+
+	if (gicc->flags & ACPI_MADT_PERFORMANCE_IRQ_MODE)
+		trigger = ACPI_EDGE_SENSITIVE;
+	else
+		trigger = ACPI_LEVEL_SENSITIVE;
+
+	/*
+	 * Helpfully, the MADT GICC doesn't have a polarity flag for the
+	 * "performance interrupt". Luckily, on compliant GICs the polarity is
+	 * a fixed value in HW (for both SPIs and PPIs) that we cannot change
+	 * from SW.
+	 *
+	 * Here we pass in ACPI_ACTIVE_HIGH to keep the core code happy. This
+	 * may not match the real polarity, but that should not matter.
+	 *
+	 * Other interrupt controllers are not supported with ACPI.
+	 */
+	return acpi_register_gsi(NULL, gsi, trigger, ACPI_ACTIVE_HIGH);
+}
+
+static void arm_pmu_acpi_unregister_irq(int cpu)
+{
+	struct acpi_madt_generic_interrupt *gicc;
+	int gsi;
+
+	gicc = acpi_cpu_get_madt_gicc(cpu);
+
+	gsi = gicc->performance_interrupt;
+	if (gsi)
+		acpi_unregister_gsi(gsi);
+}
+
+static int __maybe_unused
+arm_acpi_register_pmu_device(struct platform_device *pdev, u8 len,
+			     u16 (*parse_gsi)(struct acpi_madt_generic_interrupt *))
+{
+	int cpu, this_hetid, hetid, irq, ret;
+	u16 this_gsi = 0, gsi = 0;
+
+	/*
+	 * Ensure that platform device must have IORESOURCE_IRQ
+	 * resource to hold gsi interrupt.
+	 */
+	if (pdev->num_resources != 1)
+		return -ENXIO;
+
+	if (pdev->resource[0].flags != IORESOURCE_IRQ)
+		return -ENXIO;
+
+	/*
+	 * Sanity check all the GICC tables for the same interrupt
+	 * number. For now, only support homogeneous ACPI machines.
+	 */
+	for_each_possible_cpu(cpu) {
+		struct acpi_madt_generic_interrupt *gicc;
+
+		gicc = acpi_cpu_get_madt_gicc(cpu);
+		if (gicc->header.length < len)
+			return gsi ? -ENXIO : 0;
+
+		this_gsi = parse_gsi(gicc);
+		this_hetid = find_acpi_cpu_topology_hetero_id(cpu);
+		if (!gsi) {
+			hetid = this_hetid;
+			gsi = this_gsi;
+		} else if (hetid != this_hetid || gsi != this_gsi) {
+			pr_warn("ACPI: %s: must be homogeneous\n", pdev->name);
+			return -ENXIO;
+		}
+	}
+
+	if (!this_gsi)
+		return 0;
+
+	irq = acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_HIGH);
+	if (irq < 0) {
+		pr_warn("ACPI: %s Unable to register interrupt: %d\n", pdev->name, gsi);
+		return -ENXIO;
+	}
+
+	pdev->resource[0].start = irq;
+	ret = platform_device_register(pdev);
+	if (ret)
+		acpi_unregister_gsi(gsi);
+
+	return ret;
+}
+
+#if IS_ENABLED(CONFIG_ARM_SPE_PMU)
+static struct resource spe_resources[] = {
+	{
+		/* irq */
+		.flags          = IORESOURCE_IRQ,
+	}
+};
+
+static struct platform_device spe_dev = {
+	.name = ARMV8_SPE_PDEV_NAME,
+	.id = -1,
+	.resource = spe_resources,
+	.num_resources = ARRAY_SIZE(spe_resources)
+};
+
+static u16 arm_spe_parse_gsi(struct acpi_madt_generic_interrupt *gicc)
+{
+	return gicc->spe_interrupt;
+}
+
+/*
+ * For lack of a better place, hook the normal PMU MADT walk
+ * and create a SPE device if we detect a recent MADT with
+ * a homogeneous PPI mapping.
+ */
+static void arm_spe_acpi_register_device(void)
+{
+	int ret = arm_acpi_register_pmu_device(&spe_dev, ACPI_MADT_GICC_SPE,
+					       arm_spe_parse_gsi);
+	if (ret)
+		pr_warn("ACPI: SPE: Unable to register device\n");
+}
+#else
+static inline void arm_spe_acpi_register_device(void)
+{
+}
+#endif /* CONFIG_ARM_SPE_PMU */
+
+#if IS_ENABLED(CONFIG_CORESIGHT_TRBE)
+static struct resource trbe_resources[] = {
+	{
+		/* irq */
+		.flags          = IORESOURCE_IRQ,
+	}
+};
+
+static struct platform_device trbe_dev = {
+	.name = ARMV8_TRBE_PDEV_NAME,
+	.id = -1,
+	.resource = trbe_resources,
+	.num_resources = ARRAY_SIZE(trbe_resources)
+};
+
+static u16 arm_trbe_parse_gsi(struct acpi_madt_generic_interrupt *gicc)
+{
+	return gicc->trbe_interrupt;
+}
+
+static void arm_trbe_acpi_register_device(void)
+{
+	int ret = arm_acpi_register_pmu_device(&trbe_dev, ACPI_MADT_GICC_TRBE,
+					       arm_trbe_parse_gsi);
+	if (ret)
+		pr_warn("ACPI: TRBE: Unable to register device\n");
+}
+#else
+static inline void arm_trbe_acpi_register_device(void)
+{
+
+}
+#endif /* CONFIG_CORESIGHT_TRBE */
+
+static int arm_pmu_acpi_parse_irqs(void)
+{
+	int irq, cpu, irq_cpu, err;
+
+	for_each_possible_cpu(cpu) {
+		irq = arm_pmu_acpi_register_irq(cpu);
+		if (irq < 0) {
+			err = irq;
+			pr_warn("Unable to parse ACPI PMU IRQ for CPU%d: %d\n",
+				cpu, err);
+			goto out_err;
+		} else if (irq == 0) {
+			pr_warn("No ACPI PMU IRQ for CPU%d\n", cpu);
+		}
+
+		/*
+		 * Log and request the IRQ so the core arm_pmu code can manage
+		 * it. We'll have to sanity-check IRQs later when we associate
+		 * them with their PMUs.
+		 */
+		per_cpu(pmu_irqs, cpu) = irq;
+		err = armpmu_request_irq(irq, cpu);
+		if (err)
+			goto out_err;
+	}
+
+	return 0;
+
+out_err:
+	for_each_possible_cpu(cpu) {
+		irq = per_cpu(pmu_irqs, cpu);
+		if (!irq)
+			continue;
+
+		arm_pmu_acpi_unregister_irq(cpu);
+
+		/*
+		 * Blat all copies of the IRQ so that we only unregister the
+		 * corresponding GSI once (e.g. when we have PPIs).
+		 */
+		for_each_possible_cpu(irq_cpu) {
+			if (per_cpu(pmu_irqs, irq_cpu) == irq)
+				per_cpu(pmu_irqs, irq_cpu) = 0;
+		}
+	}
+
+	return err;
+}
+
+static struct arm_pmu *arm_pmu_acpi_find_pmu(void)
+{
+	unsigned long cpuid = read_cpuid_id();
+	struct arm_pmu *pmu;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		pmu = per_cpu(probed_pmus, cpu);
+		if (!pmu || pmu->acpi_cpuid != cpuid)
+			continue;
+
+		return pmu;
+	}
+
+	return NULL;
+}
+
+/*
+ * Check whether the new IRQ is compatible with those already associated with
+ * the PMU (e.g. we don't have mismatched PPIs).
+ */
+static bool pmu_irq_matches(struct arm_pmu *pmu, int irq)
+{
+	struct pmu_hw_events __percpu *hw_events = pmu->hw_events;
+	int cpu;
+
+	if (!irq)
+		return true;
+
+	for_each_cpu(cpu, &pmu->supported_cpus) {
+		int other_irq = per_cpu(hw_events->irq, cpu);
+		if (!other_irq)
+			continue;
+
+		if (irq == other_irq)
+			continue;
+		if (!irq_is_percpu_devid(irq) && !irq_is_percpu_devid(other_irq))
+			continue;
+
+		pr_warn("mismatched PPIs detected\n");
+		return false;
+	}
+
+	return true;
+}
+
+static void arm_pmu_acpi_associate_pmu_cpu(struct arm_pmu *pmu,
+					   unsigned int cpu)
+{
+	int irq = per_cpu(pmu_irqs, cpu);
+
+	per_cpu(probed_pmus, cpu) = pmu;
+
+	if (pmu_irq_matches(pmu, irq)) {
+		struct pmu_hw_events __percpu *hw_events;
+		hw_events = pmu->hw_events;
+		per_cpu(hw_events->irq, cpu) = irq;
+	}
+
+	cpumask_set_cpu(cpu, &pmu->supported_cpus);
+}
+
+/*
+ * This must run before the common arm_pmu hotplug logic, so that we can
+ * associate a CPU and its interrupt before the common code tries to manage the
+ * affinity and so on.
+ *
+ * Note that hotplug events are serialized, so we cannot race with another CPU
+ * coming up. The perf core won't open events while a hotplug event is in
+ * progress.
+ */
+static int arm_pmu_acpi_cpu_starting(unsigned int cpu)
+{
+	struct arm_pmu *pmu;
+
+	/* If we've already probed this CPU, we have nothing to do */
+	if (per_cpu(probed_pmus, cpu))
+		return 0;
+
+	pmu = arm_pmu_acpi_find_pmu();
+	if (!pmu) {
+		pr_warn_ratelimited("Unable to associate CPU%d with a PMU\n",
+				    cpu);
+		return 0;
+	}
+
+	arm_pmu_acpi_associate_pmu_cpu(pmu, cpu);
+	return 0;
+}
+
+static void arm_pmu_acpi_probe_matching_cpus(struct arm_pmu *pmu,
+					     unsigned long cpuid)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		unsigned long cpu_cpuid = per_cpu(cpu_data, cpu).reg_midr;
+
+		if (cpu_cpuid == cpuid)
+			arm_pmu_acpi_associate_pmu_cpu(pmu, cpu);
+	}
+}
+
+int arm_pmu_acpi_probe(armpmu_init_fn init_fn)
+{
+	int pmu_idx = 0;
+	unsigned int cpu;
+	int ret;
+
+	ret = arm_pmu_acpi_parse_irqs();
+	if (ret)
+		return ret;
+
+	ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_ACPI_STARTING,
+					"perf/arm/pmu_acpi:starting",
+					arm_pmu_acpi_cpu_starting, NULL);
+	if (ret)
+		return ret;
+
+	/*
+	 * Initialise and register the set of PMUs which we know about right
+	 * now. Ideally we'd do this in arm_pmu_acpi_cpu_starting() so that we
+	 * could handle late hotplug, but this may lead to deadlock since we
+	 * might try to register a hotplug notifier instance from within a
+	 * hotplug notifier.
+	 *
+	 * There's also the problem of having access to the right init_fn,
+	 * without tying this too deeply into the "real" PMU driver.
+	 *
+	 * For the moment, as with the platform/DT case, we need at least one
+	 * of a PMU's CPUs to be online at probe time.
+	 */
+	for_each_online_cpu(cpu) {
+		struct arm_pmu *pmu = per_cpu(probed_pmus, cpu);
+		unsigned long cpuid;
+		char *base_name;
+
+		/* If we've already probed this CPU, we have nothing to do */
+		if (pmu)
+			continue;
+
+		pmu = armpmu_alloc();
+		if (!pmu) {
+			pr_warn("Unable to allocate PMU for CPU%d\n",
+				cpu);
+			return -ENOMEM;
+		}
+
+		cpuid = per_cpu(cpu_data, cpu).reg_midr;
+		pmu->acpi_cpuid = cpuid;
+
+		arm_pmu_acpi_probe_matching_cpus(pmu, cpuid);
+
+		ret = init_fn(pmu);
+		if (ret == -ENODEV) {
+			/* PMU not handled by this driver, or not present */
+			continue;
+		} else if (ret) {
+			pr_warn("Unable to initialise PMU for CPU%d\n", cpu);
+			return ret;
+		}
+
+		base_name = pmu->name;
+		pmu->name = kasprintf(GFP_KERNEL, "%s_%d", base_name, pmu_idx++);
+		if (!pmu->name) {
+			pr_warn("Unable to allocate PMU name for CPU%d\n", cpu);
+			return -ENOMEM;
+		}
+
+		ret = armpmu_register(pmu);
+		if (ret) {
+			pr_warn("Failed to register PMU for CPU%d\n", cpu);
+			kfree(pmu->name);
+			return ret;
+		}
+	}
+
+	return ret;
+}
+
+static int arm_pmu_acpi_init(void)
+{
+	if (acpi_disabled)
+		return 0;
+
+	arm_spe_acpi_register_device();
+	arm_trbe_acpi_register_device();
+
+	return 0;
+}
+subsys_initcall(arm_pmu_acpi_init)
diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c
new file mode 100644
index 0000000000..3596db36cb
--- /dev/null
+++ b/drivers/perf/arm_pmu_platform.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * platform_device probing code for ARM performance counters.
+ *
+ * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
+ * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
+ */
+#define pr_fmt(fmt) "hw perfevents: " fmt
+#define dev_fmt pr_fmt
+
+#include <linux/bug.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
+#include <linux/kconfig.h>
+#include <linux/of.h>
+#include <linux/percpu.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+#include <linux/smp.h>
+
+static int probe_current_pmu(struct arm_pmu *pmu,
+			     const struct pmu_probe_info *info)
+{
+	int cpu = get_cpu();
+	unsigned int cpuid = read_cpuid_id();
+	int ret = -ENODEV;
+
+	pr_info("probing PMU on CPU %d\n", cpu);
+
+	for (; info->init != NULL; info++) {
+		if ((cpuid & info->mask) != info->cpuid)
+			continue;
+		ret = info->init(pmu);
+		break;
+	}
+
+	put_cpu();
+	return ret;
+}
+
+static int pmu_parse_percpu_irq(struct arm_pmu *pmu, int irq)
+{
+	int cpu, ret;
+	struct pmu_hw_events __percpu *hw_events = pmu->hw_events;
+
+	ret = irq_get_percpu_devid_partition(irq, &pmu->supported_cpus);
+	if (ret)
+		return ret;
+
+	for_each_cpu(cpu, &pmu->supported_cpus)
+		per_cpu(hw_events->irq, cpu) = irq;
+
+	return 0;
+}
+
+static bool pmu_has_irq_affinity(struct device_node *node)
+{
+	return !!of_find_property(node, "interrupt-affinity", NULL);
+}
+
+static int pmu_parse_irq_affinity(struct device *dev, int i)
+{
+	struct device_node *dn;
+	int cpu;
+
+	/*
+	 * If we don't have an interrupt-affinity property, we guess irq
+	 * affinity matches our logical CPU order, as we used to assume.
+	 * This is fragile, so we'll warn in pmu_parse_irqs().
+	 */
+	if (!pmu_has_irq_affinity(dev->of_node))
+		return i;
+
+	dn = of_parse_phandle(dev->of_node, "interrupt-affinity", i);
+	if (!dn) {
+		dev_warn(dev, "failed to parse interrupt-affinity[%d]\n", i);
+		return -EINVAL;
+	}
+
+	cpu = of_cpu_node_to_id(dn);
+	if (cpu < 0) {
+		dev_warn(dev, "failed to find logical CPU for %pOFn\n", dn);
+		cpu = nr_cpu_ids;
+	}
+
+	of_node_put(dn);
+
+	return cpu;
+}
+
+static int pmu_parse_irqs(struct arm_pmu *pmu)
+{
+	int i = 0, num_irqs;
+	struct platform_device *pdev = pmu->plat_device;
+	struct pmu_hw_events __percpu *hw_events = pmu->hw_events;
+	struct device *dev = &pdev->dev;
+
+	num_irqs = platform_irq_count(pdev);
+	if (num_irqs < 0)
+		return dev_err_probe(dev, num_irqs, "unable to count PMU IRQs\n");
+
+	/*
+	 * In this case we have no idea which CPUs are covered by the PMU.
+	 * To match our prior behaviour, we assume all CPUs in this case.
+	 */
+	if (num_irqs == 0) {
+		dev_warn(dev, "no irqs for PMU, sampling events not supported\n");
+		pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+		cpumask_setall(&pmu->supported_cpus);
+		return 0;
+	}
+
+	if (num_irqs == 1) {
+		int irq = platform_get_irq(pdev, 0);
+		if ((irq > 0) && irq_is_percpu_devid(irq))
+			return pmu_parse_percpu_irq(pmu, irq);
+	}
+
+	if (nr_cpu_ids != 1 && !pmu_has_irq_affinity(dev->of_node))
+		dev_warn(dev, "no interrupt-affinity property, guessing.\n");
+
+	for (i = 0; i < num_irqs; i++) {
+		int cpu, irq;
+
+		irq = platform_get_irq(pdev, i);
+		if (WARN_ON(irq <= 0))
+			continue;
+
+		if (irq_is_percpu_devid(irq)) {
+			dev_warn(dev, "multiple PPIs or mismatched SPI/PPI detected\n");
+			return -EINVAL;
+		}
+
+		cpu = pmu_parse_irq_affinity(dev, i);
+		if (cpu < 0)
+			return cpu;
+		if (cpu >= nr_cpu_ids)
+			continue;
+
+		if (per_cpu(hw_events->irq, cpu)) {
+			dev_warn(dev, "multiple PMU IRQs for the same CPU detected\n");
+			return -EINVAL;
+		}
+
+		per_cpu(hw_events->irq, cpu) = irq;
+		cpumask_set_cpu(cpu, &pmu->supported_cpus);
+	}
+
+	return 0;
+}
+
+static int armpmu_request_irqs(struct arm_pmu *armpmu)
+{
+	struct pmu_hw_events __percpu *hw_events = armpmu->hw_events;
+	int cpu, err = 0;
+
+	for_each_cpu(cpu, &armpmu->supported_cpus) {
+		int irq = per_cpu(hw_events->irq, cpu);
+		if (!irq)
+			continue;
+
+		err = armpmu_request_irq(irq, cpu);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+static void armpmu_free_irqs(struct arm_pmu *armpmu)
+{
+	int cpu;
+	struct pmu_hw_events __percpu *hw_events = armpmu->hw_events;
+
+	for_each_cpu(cpu, &armpmu->supported_cpus) {
+		int irq = per_cpu(hw_events->irq, cpu);
+
+		armpmu_free_irq(irq, cpu);
+	}
+}
+
+int arm_pmu_device_probe(struct platform_device *pdev,
+			 const struct of_device_id *of_table,
+			 const struct pmu_probe_info *probe_table)
+{
+	armpmu_init_fn init_fn;
+	struct device *dev = &pdev->dev;
+	struct arm_pmu *pmu;
+	int ret = -ENODEV;
+
+	pmu = armpmu_alloc();
+	if (!pmu)
+		return -ENOMEM;
+
+	pmu->plat_device = pdev;
+
+	ret = pmu_parse_irqs(pmu);
+	if (ret)
+		goto out_free;
+
+	init_fn = of_device_get_match_data(dev);
+	if (init_fn) {
+		pmu->secure_access = of_property_read_bool(dev->of_node,
+							   "secure-reg-access");
+
+		/* arm64 systems boot only as non-secure */
+		if (IS_ENABLED(CONFIG_ARM64) && pmu->secure_access) {
+			dev_warn(dev, "ignoring \"secure-reg-access\" property for arm64\n");
+			pmu->secure_access = false;
+		}
+
+		ret = init_fn(pmu);
+	} else if (probe_table) {
+		cpumask_setall(&pmu->supported_cpus);
+		ret = probe_current_pmu(pmu, probe_table);
+	}
+
+	if (ret) {
+		dev_err(dev, "failed to probe PMU!\n");
+		goto out_free;
+	}
+
+	ret = armpmu_request_irqs(pmu);
+	if (ret)
+		goto out_free_irqs;
+
+	ret = armpmu_register(pmu);
+	if (ret) {
+		dev_err(dev, "failed to register PMU devices!\n");
+		goto out_free_irqs;
+	}
+
+	return 0;
+
+out_free_irqs:
+	armpmu_free_irqs(pmu);
+out_free:
+	armpmu_free(pmu);
+	return ret;
+}
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
new file mode 100644
index 0000000000..d681638ec6
--- /dev/null
+++ b/drivers/perf/arm_pmuv3.c
@@ -0,0 +1,1431 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ARMv8 PMUv3 Performance Events handling code.
+ *
+ * Copyright (C) 2012 ARM Limited
+ * Author: Will Deacon <will.deacon@arm.com>
+ *
+ * This code is based heavily on the ARMv7 perf event code.
+ */
+
+#include <asm/irq_regs.h>
+#include <asm/perf_event.h>
+#include <asm/virt.h>
+
+#include <clocksource/arm_arch_timer.h>
+
+#include <linux/acpi.h>
+#include <linux/clocksource.h>
+#include <linux/of.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/perf/arm_pmuv3.h>
+#include <linux/platform_device.h>
+#include <linux/sched_clock.h>
+#include <linux/smp.h>
+#include <linux/nmi.h>
+
+#include <asm/arm_pmuv3.h>
+
+/* ARMv8 Cortex-A53 specific event types. */
+#define ARMV8_A53_PERFCTR_PREF_LINEFILL				0xC2
+
+/* ARMv8 Cavium ThunderX specific event types. */
+#define ARMV8_THUNDER_PERFCTR_L1D_CACHE_MISS_ST			0xE9
+#define ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_ACCESS		0xEA
+#define ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_MISS		0xEB
+#define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS		0xEC
+#define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS		0xED
+
+/*
+ * ARMv8 Architectural defined events, not all of these may
+ * be supported on any given implementation. Unsupported events will
+ * be disabled at run-time based on the PMCEID registers.
+ */
+static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
+	PERF_MAP_ALL_UNSUPPORTED,
+	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INST_RETIRED,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
+	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
+	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
+	[PERF_COUNT_HW_BUS_CYCLES]		= ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= ARMV8_PMUV3_PERFCTR_STALL_FRONTEND,
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= ARMV8_PMUV3_PERFCTR_STALL_BACKEND,
+};
+
+static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+						[PERF_COUNT_HW_CACHE_OP_MAX]
+						[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
+	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
+
+	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE,
+	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
+
+	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL,
+	[C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1D_TLB,
+
+	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
+	[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1I_TLB,
+
+	[C(LL)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD,
+	[C(LL)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_LL_CACHE_RD,
+
+	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
+	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
+};
+
+static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					      [PERF_COUNT_HW_CACHE_OP_MAX]
+					      [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+	[C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_A53_PERFCTR_PREF_LINEFILL,
+
+	[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
+	[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
+};
+
+static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					      [PERF_COUNT_HW_CACHE_OP_MAX]
+					      [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
+	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR,
+
+	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
+
+	[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
+	[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
+};
+
+static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					      [PERF_COUNT_HW_CACHE_OP_MAX]
+					      [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
+};
+
+static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+						   [PERF_COUNT_HW_CACHE_OP_MAX]
+						   [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
+	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_THUNDER_PERFCTR_L1D_CACHE_MISS_ST,
+	[C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_ACCESS,
+	[C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_MISS,
+
+	[C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS,
+	[C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS,
+
+	[C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD,
+	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
+};
+
+static const unsigned armv8_vulcan_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					      [PERF_COUNT_HW_CACHE_OP_MAX]
+					      [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
+	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR,
+
+	[C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR,
+	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
+
+	[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
+	[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
+};
+
+static ssize_t
+armv8pmu_events_sysfs_show(struct device *dev,
+			   struct device_attribute *attr, char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+
+	return sprintf(page, "event=0x%04llx\n", pmu_attr->id);
+}
+
+#define ARMV8_EVENT_ATTR(name, config)						\
+	PMU_EVENT_ATTR_ID(name, armv8pmu_events_sysfs_show, config)
+
+static struct attribute *armv8_pmuv3_event_attrs[] = {
+	ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR),
+	ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL),
+	ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL),
+	ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL),
+	ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE),
+	ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL),
+	ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED),
+	ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED),
+	ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED),
+	ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN),
+	ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN),
+	ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED),
+	ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED),
+	ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED),
+	ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED),
+	ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED),
+	ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED),
+	ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES),
+	ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED),
+	ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS),
+	ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE),
+	ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB),
+	ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE),
+	ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL),
+	ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB),
+	ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS),
+	ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR),
+	ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC),
+	ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED),
+	ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES),
+	/* Don't expose the chain event in /sys, since it's useless in isolation */
+	ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE),
+	ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE),
+	ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED),
+	ARMV8_EVENT_ATTR(br_mis_pred_retired, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED),
+	ARMV8_EVENT_ATTR(stall_frontend, ARMV8_PMUV3_PERFCTR_STALL_FRONTEND),
+	ARMV8_EVENT_ATTR(stall_backend, ARMV8_PMUV3_PERFCTR_STALL_BACKEND),
+	ARMV8_EVENT_ATTR(l1d_tlb, ARMV8_PMUV3_PERFCTR_L1D_TLB),
+	ARMV8_EVENT_ATTR(l1i_tlb, ARMV8_PMUV3_PERFCTR_L1I_TLB),
+	ARMV8_EVENT_ATTR(l2i_cache, ARMV8_PMUV3_PERFCTR_L2I_CACHE),
+	ARMV8_EVENT_ATTR(l2i_cache_refill, ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL),
+	ARMV8_EVENT_ATTR(l3d_cache_allocate, ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE),
+	ARMV8_EVENT_ATTR(l3d_cache_refill, ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL),
+	ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE),
+	ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB),
+	ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL),
+	ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL),
+	ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB),
+	ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB),
+	ARMV8_EVENT_ATTR(remote_access, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS),
+	ARMV8_EVENT_ATTR(ll_cache, ARMV8_PMUV3_PERFCTR_LL_CACHE),
+	ARMV8_EVENT_ATTR(ll_cache_miss, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS),
+	ARMV8_EVENT_ATTR(dtlb_walk, ARMV8_PMUV3_PERFCTR_DTLB_WALK),
+	ARMV8_EVENT_ATTR(itlb_walk, ARMV8_PMUV3_PERFCTR_ITLB_WALK),
+	ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD),
+	ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD),
+	ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD),
+	ARMV8_EVENT_ATTR(l1d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD),
+	ARMV8_EVENT_ATTR(op_retired, ARMV8_PMUV3_PERFCTR_OP_RETIRED),
+	ARMV8_EVENT_ATTR(op_spec, ARMV8_PMUV3_PERFCTR_OP_SPEC),
+	ARMV8_EVENT_ATTR(stall, ARMV8_PMUV3_PERFCTR_STALL),
+	ARMV8_EVENT_ATTR(stall_slot_backend, ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND),
+	ARMV8_EVENT_ATTR(stall_slot_frontend, ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND),
+	ARMV8_EVENT_ATTR(stall_slot, ARMV8_PMUV3_PERFCTR_STALL_SLOT),
+	ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP),
+	ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED),
+	ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE),
+	ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION),
+	ARMV8_EVENT_ATTR(cnt_cycles, ARMV8_AMU_PERFCTR_CNT_CYCLES),
+	ARMV8_EVENT_ATTR(stall_backend_mem, ARMV8_AMU_PERFCTR_STALL_BACKEND_MEM),
+	ARMV8_EVENT_ATTR(l1i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L1I_CACHE_LMISS),
+	ARMV8_EVENT_ATTR(l2d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD),
+	ARMV8_EVENT_ATTR(l2i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS),
+	ARMV8_EVENT_ATTR(l3d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD),
+	ARMV8_EVENT_ATTR(trb_wrap, ARMV8_PMUV3_PERFCTR_TRB_WRAP),
+	ARMV8_EVENT_ATTR(trb_trig, ARMV8_PMUV3_PERFCTR_TRB_TRIG),
+	ARMV8_EVENT_ATTR(trcextout0, ARMV8_PMUV3_PERFCTR_TRCEXTOUT0),
+	ARMV8_EVENT_ATTR(trcextout1, ARMV8_PMUV3_PERFCTR_TRCEXTOUT1),
+	ARMV8_EVENT_ATTR(trcextout2, ARMV8_PMUV3_PERFCTR_TRCEXTOUT2),
+	ARMV8_EVENT_ATTR(trcextout3, ARMV8_PMUV3_PERFCTR_TRCEXTOUT3),
+	ARMV8_EVENT_ATTR(cti_trigout4, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT4),
+	ARMV8_EVENT_ATTR(cti_trigout5, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT5),
+	ARMV8_EVENT_ATTR(cti_trigout6, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT6),
+	ARMV8_EVENT_ATTR(cti_trigout7, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT7),
+	ARMV8_EVENT_ATTR(ldst_align_lat, ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT),
+	ARMV8_EVENT_ATTR(ld_align_lat, ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT),
+	ARMV8_EVENT_ATTR(st_align_lat, ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT),
+	ARMV8_EVENT_ATTR(mem_access_checked, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED),
+	ARMV8_EVENT_ATTR(mem_access_checked_rd, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_RD),
+	ARMV8_EVENT_ATTR(mem_access_checked_wr, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR),
+	NULL,
+};
+
+static umode_t
+armv8pmu_event_attr_is_visible(struct kobject *kobj,
+			       struct attribute *attr, int unused)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr);
+
+	if (pmu_attr->id < ARMV8_PMUV3_MAX_COMMON_EVENTS &&
+	    test_bit(pmu_attr->id, cpu_pmu->pmceid_bitmap))
+		return attr->mode;
+
+	if (pmu_attr->id >= ARMV8_PMUV3_EXT_COMMON_EVENT_BASE) {
+		u64 id = pmu_attr->id - ARMV8_PMUV3_EXT_COMMON_EVENT_BASE;
+
+		if (id < ARMV8_PMUV3_MAX_COMMON_EVENTS &&
+		    test_bit(id, cpu_pmu->pmceid_ext_bitmap))
+			return attr->mode;
+	}
+
+	return 0;
+}
+
+static const struct attribute_group armv8_pmuv3_events_attr_group = {
+	.name = "events",
+	.attrs = armv8_pmuv3_event_attrs,
+	.is_visible = armv8pmu_event_attr_is_visible,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-15");
+PMU_FORMAT_ATTR(long, "config1:0");
+PMU_FORMAT_ATTR(rdpmc, "config1:1");
+
+static int sysctl_perf_user_access __read_mostly;
+
+static inline bool armv8pmu_event_is_64bit(struct perf_event *event)
+{
+	return event->attr.config1 & 0x1;
+}
+
+static inline bool armv8pmu_event_want_user_access(struct perf_event *event)
+{
+	return event->attr.config1 & 0x2;
+}
+
+static struct attribute *armv8_pmuv3_format_attrs[] = {
+	&format_attr_event.attr,
+	&format_attr_long.attr,
+	&format_attr_rdpmc.attr,
+	NULL,
+};
+
+static const struct attribute_group armv8_pmuv3_format_attr_group = {
+	.name = "format",
+	.attrs = armv8_pmuv3_format_attrs,
+};
+
+static ssize_t slots_show(struct device *dev, struct device_attribute *attr,
+			  char *page)
+{
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
+	u32 slots = cpu_pmu->reg_pmmir & ARMV8_PMU_SLOTS_MASK;
+
+	return sysfs_emit(page, "0x%08x\n", slots);
+}
+
+static DEVICE_ATTR_RO(slots);
+
+static ssize_t bus_slots_show(struct device *dev, struct device_attribute *attr,
+			      char *page)
+{
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
+	u32 bus_slots = (cpu_pmu->reg_pmmir >> ARMV8_PMU_BUS_SLOTS_SHIFT)
+			& ARMV8_PMU_BUS_SLOTS_MASK;
+
+	return sysfs_emit(page, "0x%08x\n", bus_slots);
+}
+
+static DEVICE_ATTR_RO(bus_slots);
+
+static ssize_t bus_width_show(struct device *dev, struct device_attribute *attr,
+			      char *page)
+{
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
+	u32 bus_width = (cpu_pmu->reg_pmmir >> ARMV8_PMU_BUS_WIDTH_SHIFT)
+			& ARMV8_PMU_BUS_WIDTH_MASK;
+	u32 val = 0;
+
+	/* Encoded as Log2(number of bytes), plus one */
+	if (bus_width > 2 && bus_width < 13)
+		val = 1 << (bus_width - 1);
+
+	return sysfs_emit(page, "0x%08x\n", val);
+}
+
+static DEVICE_ATTR_RO(bus_width);
+
+static struct attribute *armv8_pmuv3_caps_attrs[] = {
+	&dev_attr_slots.attr,
+	&dev_attr_bus_slots.attr,
+	&dev_attr_bus_width.attr,
+	NULL,
+};
+
+static const struct attribute_group armv8_pmuv3_caps_attr_group = {
+	.name = "caps",
+	.attrs = armv8_pmuv3_caps_attrs,
+};
+
+/*
+ * Perf Events' indices
+ */
+#define	ARMV8_IDX_CYCLE_COUNTER	0
+#define	ARMV8_IDX_COUNTER0	1
+#define	ARMV8_IDX_CYCLE_COUNTER_USER	32
+
+/*
+ * We unconditionally enable ARMv8.5-PMU long event counter support
+ * (64-bit events) where supported. Indicate if this arm_pmu has long
+ * event counter support.
+ *
+ * On AArch32, long counters make no sense (you can't access the top
+ * bits), so we only enable this on AArch64.
+ */
+static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu)
+{
+	return (IS_ENABLED(CONFIG_ARM64) && is_pmuv3p5(cpu_pmu->pmuver));
+}
+
+static inline bool armv8pmu_event_has_user_read(struct perf_event *event)
+{
+	return event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT;
+}
+
+/*
+ * We must chain two programmable counters for 64 bit events,
+ * except when we have allocated the 64bit cycle counter (for CPU
+ * cycles event) or when user space counter access is enabled.
+ */
+static inline bool armv8pmu_event_is_chained(struct perf_event *event)
+{
+	int idx = event->hw.idx;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+
+	return !armv8pmu_event_has_user_read(event) &&
+	       armv8pmu_event_is_64bit(event) &&
+	       !armv8pmu_has_long_event(cpu_pmu) &&
+	       (idx != ARMV8_IDX_CYCLE_COUNTER);
+}
+
+/*
+ * ARMv8 low level PMU access
+ */
+
+/*
+ * Perf Event to low level counters mapping
+ */
+#define	ARMV8_IDX_TO_COUNTER(x)	\
+	(((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK)
+
+static inline u64 armv8pmu_pmcr_read(void)
+{
+	return read_pmcr();
+}
+
+static inline void armv8pmu_pmcr_write(u64 val)
+{
+	val &= ARMV8_PMU_PMCR_MASK;
+	isb();
+	write_pmcr(val);
+}
+
+static inline int armv8pmu_has_overflowed(u32 pmovsr)
+{
+	return pmovsr & ARMV8_PMU_OVERFLOWED_MASK;
+}
+
+static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx)
+{
+	return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx));
+}
+
+static inline u64 armv8pmu_read_evcntr(int idx)
+{
+	u32 counter = ARMV8_IDX_TO_COUNTER(idx);
+
+	return read_pmevcntrn(counter);
+}
+
+static inline u64 armv8pmu_read_hw_counter(struct perf_event *event)
+{
+	int idx = event->hw.idx;
+	u64 val = armv8pmu_read_evcntr(idx);
+
+	if (armv8pmu_event_is_chained(event))
+		val = (val << 32) | armv8pmu_read_evcntr(idx - 1);
+	return val;
+}
+
+/*
+ * The cycle counter is always a 64-bit counter. When ARMV8_PMU_PMCR_LP
+ * is set the event counters also become 64-bit counters. Unless the
+ * user has requested a long counter (attr.config1) then we want to
+ * interrupt upon 32-bit overflow - we achieve this by applying a bias.
+ */
+static bool armv8pmu_event_needs_bias(struct perf_event *event)
+{
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (armv8pmu_event_is_64bit(event))
+		return false;
+
+	if (armv8pmu_has_long_event(cpu_pmu) ||
+	    idx == ARMV8_IDX_CYCLE_COUNTER)
+		return true;
+
+	return false;
+}
+
+static u64 armv8pmu_bias_long_counter(struct perf_event *event, u64 value)
+{
+	if (armv8pmu_event_needs_bias(event))
+		value |= GENMASK_ULL(63, 32);
+
+	return value;
+}
+
+static u64 armv8pmu_unbias_long_counter(struct perf_event *event, u64 value)
+{
+	if (armv8pmu_event_needs_bias(event))
+		value &= ~GENMASK_ULL(63, 32);
+
+	return value;
+}
+
+static u64 armv8pmu_read_counter(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+	u64 value;
+
+	if (idx == ARMV8_IDX_CYCLE_COUNTER)
+		value = read_pmccntr();
+	else
+		value = armv8pmu_read_hw_counter(event);
+
+	return  armv8pmu_unbias_long_counter(event, value);
+}
+
+static inline void armv8pmu_write_evcntr(int idx, u64 value)
+{
+	u32 counter = ARMV8_IDX_TO_COUNTER(idx);
+
+	write_pmevcntrn(counter, value);
+}
+
+static inline void armv8pmu_write_hw_counter(struct perf_event *event,
+					     u64 value)
+{
+	int idx = event->hw.idx;
+
+	if (armv8pmu_event_is_chained(event)) {
+		armv8pmu_write_evcntr(idx, upper_32_bits(value));
+		armv8pmu_write_evcntr(idx - 1, lower_32_bits(value));
+	} else {
+		armv8pmu_write_evcntr(idx, value);
+	}
+}
+
+static void armv8pmu_write_counter(struct perf_event *event, u64 value)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	value = armv8pmu_bias_long_counter(event, value);
+
+	if (idx == ARMV8_IDX_CYCLE_COUNTER)
+		write_pmccntr(value);
+	else
+		armv8pmu_write_hw_counter(event, value);
+}
+
+static inline void armv8pmu_write_evtype(int idx, u32 val)
+{
+	u32 counter = ARMV8_IDX_TO_COUNTER(idx);
+
+	val &= ARMV8_PMU_EVTYPE_MASK;
+	write_pmevtypern(counter, val);
+}
+
+static inline void armv8pmu_write_event_type(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	/*
+	 * For chained events, the low counter is programmed to count
+	 * the event of interest and the high counter is programmed
+	 * with CHAIN event code with filters set to count at all ELs.
+	 */
+	if (armv8pmu_event_is_chained(event)) {
+		u32 chain_evt = ARMV8_PMUV3_PERFCTR_CHAIN |
+				ARMV8_PMU_INCLUDE_EL2;
+
+		armv8pmu_write_evtype(idx - 1, hwc->config_base);
+		armv8pmu_write_evtype(idx, chain_evt);
+	} else {
+		if (idx == ARMV8_IDX_CYCLE_COUNTER)
+			write_pmccfiltr(hwc->config_base);
+		else
+			armv8pmu_write_evtype(idx, hwc->config_base);
+	}
+}
+
+static u32 armv8pmu_event_cnten_mask(struct perf_event *event)
+{
+	int counter = ARMV8_IDX_TO_COUNTER(event->hw.idx);
+	u32 mask = BIT(counter);
+
+	if (armv8pmu_event_is_chained(event))
+		mask |= BIT(counter - 1);
+	return mask;
+}
+
+static inline void armv8pmu_enable_counter(u32 mask)
+{
+	/*
+	 * Make sure event configuration register writes are visible before we
+	 * enable the counter.
+	 * */
+	isb();
+	write_pmcntenset(mask);
+}
+
+static inline void armv8pmu_enable_event_counter(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	u32 mask = armv8pmu_event_cnten_mask(event);
+
+	kvm_set_pmu_events(mask, attr);
+
+	/* We rely on the hypervisor switch code to enable guest counters */
+	if (!kvm_pmu_counter_deferred(attr))
+		armv8pmu_enable_counter(mask);
+}
+
+static inline void armv8pmu_disable_counter(u32 mask)
+{
+	write_pmcntenclr(mask);
+	/*
+	 * Make sure the effects of disabling the counter are visible before we
+	 * start configuring the event.
+	 */
+	isb();
+}
+
+static inline void armv8pmu_disable_event_counter(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	u32 mask = armv8pmu_event_cnten_mask(event);
+
+	kvm_clr_pmu_events(mask);
+
+	/* We rely on the hypervisor switch code to disable guest counters */
+	if (!kvm_pmu_counter_deferred(attr))
+		armv8pmu_disable_counter(mask);
+}
+
+static inline void armv8pmu_enable_intens(u32 mask)
+{
+	write_pmintenset(mask);
+}
+
+static inline void armv8pmu_enable_event_irq(struct perf_event *event)
+{
+	u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx);
+	armv8pmu_enable_intens(BIT(counter));
+}
+
+static inline void armv8pmu_disable_intens(u32 mask)
+{
+	write_pmintenclr(mask);
+	isb();
+	/* Clear the overflow flag in case an interrupt is pending. */
+	write_pmovsclr(mask);
+	isb();
+}
+
+static inline void armv8pmu_disable_event_irq(struct perf_event *event)
+{
+	u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx);
+	armv8pmu_disable_intens(BIT(counter));
+}
+
+static inline u32 armv8pmu_getreset_flags(void)
+{
+	u32 value;
+
+	/* Read */
+	value = read_pmovsclr();
+
+	/* Write to clear flags */
+	value &= ARMV8_PMU_OVSR_MASK;
+	write_pmovsclr(value);
+
+	return value;
+}
+
+static void update_pmuserenr(u64 val)
+{
+	lockdep_assert_irqs_disabled();
+
+	/*
+	 * The current PMUSERENR_EL0 value might be the value for the guest.
+	 * If that's the case, have KVM keep tracking of the register value
+	 * for the host EL0 so that KVM can restore it before returning to
+	 * the host EL0. Otherwise, update the register now.
+	 */
+	if (kvm_set_pmuserenr(val))
+		return;
+
+	write_pmuserenr(val);
+}
+
+static void armv8pmu_disable_user_access(void)
+{
+	update_pmuserenr(0);
+}
+
+static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu)
+{
+	int i;
+	struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
+
+	/* Clear any unused counters to avoid leaking their contents */
+	for_each_clear_bit(i, cpuc->used_mask, cpu_pmu->num_events) {
+		if (i == ARMV8_IDX_CYCLE_COUNTER)
+			write_pmccntr(0);
+		else
+			armv8pmu_write_evcntr(i, 0);
+	}
+
+	update_pmuserenr(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR);
+}
+
+static void armv8pmu_enable_event(struct perf_event *event)
+{
+	/*
+	 * Enable counter and interrupt, and set the counter to count
+	 * the event that we're interested in.
+	 */
+	armv8pmu_disable_event_counter(event);
+	armv8pmu_write_event_type(event);
+	armv8pmu_enable_event_irq(event);
+	armv8pmu_enable_event_counter(event);
+}
+
+static void armv8pmu_disable_event(struct perf_event *event)
+{
+	armv8pmu_disable_event_counter(event);
+	armv8pmu_disable_event_irq(event);
+}
+
+static void armv8pmu_start(struct arm_pmu *cpu_pmu)
+{
+	struct perf_event_context *ctx;
+	int nr_user = 0;
+
+	ctx = perf_cpu_task_ctx();
+	if (ctx)
+		nr_user = ctx->nr_user;
+
+	if (sysctl_perf_user_access && nr_user)
+		armv8pmu_enable_user_access(cpu_pmu);
+	else
+		armv8pmu_disable_user_access();
+
+	/* Enable all counters */
+	armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E);
+
+	kvm_vcpu_pmu_resync_el0();
+}
+
+static void armv8pmu_stop(struct arm_pmu *cpu_pmu)
+{
+	/* Disable all counters */
+	armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E);
+}
+
+static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
+{
+	u32 pmovsr;
+	struct perf_sample_data data;
+	struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
+	struct pt_regs *regs;
+	int idx;
+
+	/*
+	 * Get and reset the IRQ flags
+	 */
+	pmovsr = armv8pmu_getreset_flags();
+
+	/*
+	 * Did an overflow occur?
+	 */
+	if (!armv8pmu_has_overflowed(pmovsr))
+		return IRQ_NONE;
+
+	/*
+	 * Handle the counter(s) overflow(s)
+	 */
+	regs = get_irq_regs();
+
+	/*
+	 * Stop the PMU while processing the counter overflows
+	 * to prevent skews in group events.
+	 */
+	armv8pmu_stop(cpu_pmu);
+	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
+		struct perf_event *event = cpuc->events[idx];
+		struct hw_perf_event *hwc;
+
+		/* Ignore if we don't have an event. */
+		if (!event)
+			continue;
+
+		/*
+		 * We have a single interrupt for all counters. Check that
+		 * each counter has overflowed before we process it.
+		 */
+		if (!armv8pmu_counter_has_overflowed(pmovsr, idx))
+			continue;
+
+		hwc = &event->hw;
+		armpmu_event_update(event);
+		perf_sample_data_init(&data, 0, hwc->last_period);
+		if (!armpmu_event_set_period(event))
+			continue;
+
+		/*
+		 * Perf event overflow will queue the processing of the event as
+		 * an irq_work which will be taken care of in the handling of
+		 * IPI_IRQ_WORK.
+		 */
+		if (perf_event_overflow(event, &data, regs))
+			cpu_pmu->disable(event);
+	}
+	armv8pmu_start(cpu_pmu);
+
+	return IRQ_HANDLED;
+}
+
+static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc,
+				    struct arm_pmu *cpu_pmu)
+{
+	int idx;
+
+	for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx++) {
+		if (!test_and_set_bit(idx, cpuc->used_mask))
+			return idx;
+	}
+	return -EAGAIN;
+}
+
+static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc,
+				   struct arm_pmu *cpu_pmu)
+{
+	int idx;
+
+	/*
+	 * Chaining requires two consecutive event counters, where
+	 * the lower idx must be even.
+	 */
+	for (idx = ARMV8_IDX_COUNTER0 + 1; idx < cpu_pmu->num_events; idx += 2) {
+		if (!test_and_set_bit(idx, cpuc->used_mask)) {
+			/* Check if the preceding even counter is available */
+			if (!test_and_set_bit(idx - 1, cpuc->used_mask))
+				return idx;
+			/* Release the Odd counter */
+			clear_bit(idx, cpuc->used_mask);
+		}
+	}
+	return -EAGAIN;
+}
+
+static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
+				  struct perf_event *event)
+{
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT;
+
+	/* Always prefer to place a cycle counter into the cycle counter. */
+	if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) {
+		if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask))
+			return ARMV8_IDX_CYCLE_COUNTER;
+		else if (armv8pmu_event_is_64bit(event) &&
+			   armv8pmu_event_want_user_access(event) &&
+			   !armv8pmu_has_long_event(cpu_pmu))
+				return -EAGAIN;
+	}
+
+	/*
+	 * Otherwise use events counters
+	 */
+	if (armv8pmu_event_is_chained(event))
+		return	armv8pmu_get_chain_idx(cpuc, cpu_pmu);
+	else
+		return armv8pmu_get_single_idx(cpuc, cpu_pmu);
+}
+
+static void armv8pmu_clear_event_idx(struct pmu_hw_events *cpuc,
+				     struct perf_event *event)
+{
+	int idx = event->hw.idx;
+
+	clear_bit(idx, cpuc->used_mask);
+	if (armv8pmu_event_is_chained(event))
+		clear_bit(idx - 1, cpuc->used_mask);
+}
+
+static int armv8pmu_user_event_idx(struct perf_event *event)
+{
+	if (!sysctl_perf_user_access || !armv8pmu_event_has_user_read(event))
+		return 0;
+
+	/*
+	 * We remap the cycle counter index to 32 to
+	 * match the offset applied to the rest of
+	 * the counter indices.
+	 */
+	if (event->hw.idx == ARMV8_IDX_CYCLE_COUNTER)
+		return ARMV8_IDX_CYCLE_COUNTER_USER;
+
+	return event->hw.idx;
+}
+
+/*
+ * Add an event filter to a given event.
+ */
+static int armv8pmu_set_event_filter(struct hw_perf_event *event,
+				     struct perf_event_attr *attr)
+{
+	unsigned long config_base = 0;
+
+	if (attr->exclude_idle)
+		return -EPERM;
+
+	/*
+	 * If we're running in hyp mode, then we *are* the hypervisor.
+	 * Therefore we ignore exclude_hv in this configuration, since
+	 * there's no hypervisor to sample anyway. This is consistent
+	 * with other architectures (x86 and Power).
+	 */
+	if (is_kernel_in_hyp_mode()) {
+		if (!attr->exclude_kernel && !attr->exclude_host)
+			config_base |= ARMV8_PMU_INCLUDE_EL2;
+		if (attr->exclude_guest)
+			config_base |= ARMV8_PMU_EXCLUDE_EL1;
+		if (attr->exclude_host)
+			config_base |= ARMV8_PMU_EXCLUDE_EL0;
+	} else {
+		if (!attr->exclude_hv && !attr->exclude_host)
+			config_base |= ARMV8_PMU_INCLUDE_EL2;
+	}
+
+	/*
+	 * Filter out !VHE kernels and guest kernels
+	 */
+	if (attr->exclude_kernel)
+		config_base |= ARMV8_PMU_EXCLUDE_EL1;
+
+	if (attr->exclude_user)
+		config_base |= ARMV8_PMU_EXCLUDE_EL0;
+
+	/*
+	 * Install the filter into config_base as this is used to
+	 * construct the event type.
+	 */
+	event->config_base = config_base;
+
+	return 0;
+}
+
+static void armv8pmu_reset(void *info)
+{
+	struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
+	u64 pmcr;
+
+	/* The counter and interrupt enable registers are unknown at reset. */
+	armv8pmu_disable_counter(U32_MAX);
+	armv8pmu_disable_intens(U32_MAX);
+
+	/* Clear the counters we flip at guest entry/exit */
+	kvm_clr_pmu_events(U32_MAX);
+
+	/*
+	 * Initialize & Reset PMNC. Request overflow interrupt for
+	 * 64 bit cycle counter but cheat in armv8pmu_write_counter().
+	 */
+	pmcr = ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_LC;
+
+	/* Enable long event counter support where available */
+	if (armv8pmu_has_long_event(cpu_pmu))
+		pmcr |= ARMV8_PMU_PMCR_LP;
+
+	armv8pmu_pmcr_write(pmcr);
+}
+
+static int __armv8_pmuv3_map_event_id(struct arm_pmu *armpmu,
+				      struct perf_event *event)
+{
+	if (event->attr.type == PERF_TYPE_HARDWARE &&
+	    event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) {
+
+		if (test_bit(ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED,
+			     armpmu->pmceid_bitmap))
+			return ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED;
+
+		if (test_bit(ARMV8_PMUV3_PERFCTR_BR_RETIRED,
+			     armpmu->pmceid_bitmap))
+			return ARMV8_PMUV3_PERFCTR_BR_RETIRED;
+
+		return HW_OP_UNSUPPORTED;
+	}
+
+	return armpmu_map_event(event, &armv8_pmuv3_perf_map,
+				&armv8_pmuv3_perf_cache_map,
+				ARMV8_PMU_EVTYPE_EVENT);
+}
+
+static int __armv8_pmuv3_map_event(struct perf_event *event,
+				   const unsigned (*extra_event_map)
+						  [PERF_COUNT_HW_MAX],
+				   const unsigned (*extra_cache_map)
+						  [PERF_COUNT_HW_CACHE_MAX]
+						  [PERF_COUNT_HW_CACHE_OP_MAX]
+						  [PERF_COUNT_HW_CACHE_RESULT_MAX])
+{
+	int hw_event_id;
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+
+	hw_event_id = __armv8_pmuv3_map_event_id(armpmu, event);
+
+	/*
+	 * CHAIN events only work when paired with an adjacent counter, and it
+	 * never makes sense for a user to open one in isolation, as they'll be
+	 * rotated arbitrarily.
+	 */
+	if (hw_event_id == ARMV8_PMUV3_PERFCTR_CHAIN)
+		return -EINVAL;
+
+	if (armv8pmu_event_is_64bit(event))
+		event->hw.flags |= ARMPMU_EVT_64BIT;
+
+	/*
+	 * User events must be allocated into a single counter, and so
+	 * must not be chained.
+	 *
+	 * Most 64-bit events require long counter support, but 64-bit
+	 * CPU_CYCLES events can be placed into the dedicated cycle
+	 * counter when this is free.
+	 */
+	if (armv8pmu_event_want_user_access(event)) {
+		if (!(event->attach_state & PERF_ATTACH_TASK))
+			return -EINVAL;
+		if (armv8pmu_event_is_64bit(event) &&
+		    (hw_event_id != ARMV8_PMUV3_PERFCTR_CPU_CYCLES) &&
+		    !armv8pmu_has_long_event(armpmu))
+			return -EOPNOTSUPP;
+
+		event->hw.flags |= PERF_EVENT_FLAG_USER_READ_CNT;
+	}
+
+	/* Only expose micro/arch events supported by this PMU */
+	if ((hw_event_id > 0) && (hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS)
+	    && test_bit(hw_event_id, armpmu->pmceid_bitmap)) {
+		return hw_event_id;
+	}
+
+	return armpmu_map_event(event, extra_event_map, extra_cache_map,
+				ARMV8_PMU_EVTYPE_EVENT);
+}
+
+static int armv8_pmuv3_map_event(struct perf_event *event)
+{
+	return __armv8_pmuv3_map_event(event, NULL, NULL);
+}
+
+static int armv8_a53_map_event(struct perf_event *event)
+{
+	return __armv8_pmuv3_map_event(event, NULL, &armv8_a53_perf_cache_map);
+}
+
+static int armv8_a57_map_event(struct perf_event *event)
+{
+	return __armv8_pmuv3_map_event(event, NULL, &armv8_a57_perf_cache_map);
+}
+
+static int armv8_a73_map_event(struct perf_event *event)
+{
+	return __armv8_pmuv3_map_event(event, NULL, &armv8_a73_perf_cache_map);
+}
+
+static int armv8_thunder_map_event(struct perf_event *event)
+{
+	return __armv8_pmuv3_map_event(event, NULL,
+				       &armv8_thunder_perf_cache_map);
+}
+
+static int armv8_vulcan_map_event(struct perf_event *event)
+{
+	return __armv8_pmuv3_map_event(event, NULL,
+				       &armv8_vulcan_perf_cache_map);
+}
+
+struct armv8pmu_probe_info {
+	struct arm_pmu *pmu;
+	bool present;
+};
+
+static void __armv8pmu_probe_pmu(void *info)
+{
+	struct armv8pmu_probe_info *probe = info;
+	struct arm_pmu *cpu_pmu = probe->pmu;
+	u64 pmceid_raw[2];
+	u32 pmceid[2];
+	int pmuver;
+
+	pmuver = read_pmuver();
+	if (!pmuv3_implemented(pmuver))
+		return;
+
+	cpu_pmu->pmuver = pmuver;
+	probe->present = true;
+
+	/* Read the nb of CNTx counters supported from PMNC */
+	cpu_pmu->num_events = (armv8pmu_pmcr_read() >> ARMV8_PMU_PMCR_N_SHIFT)
+		& ARMV8_PMU_PMCR_N_MASK;
+
+	/* Add the CPU cycles counter */
+	cpu_pmu->num_events += 1;
+
+	pmceid[0] = pmceid_raw[0] = read_pmceid0();
+	pmceid[1] = pmceid_raw[1] = read_pmceid1();
+
+	bitmap_from_arr32(cpu_pmu->pmceid_bitmap,
+			     pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS);
+
+	pmceid[0] = pmceid_raw[0] >> 32;
+	pmceid[1] = pmceid_raw[1] >> 32;
+
+	bitmap_from_arr32(cpu_pmu->pmceid_ext_bitmap,
+			     pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS);
+
+	/* store PMMIR register for sysfs */
+	if (is_pmuv3p4(pmuver) && (pmceid_raw[1] & BIT(31)))
+		cpu_pmu->reg_pmmir = read_pmmir();
+	else
+		cpu_pmu->reg_pmmir = 0;
+}
+
+static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
+{
+	struct armv8pmu_probe_info probe = {
+		.pmu = cpu_pmu,
+		.present = false,
+	};
+	int ret;
+
+	ret = smp_call_function_any(&cpu_pmu->supported_cpus,
+				    __armv8pmu_probe_pmu,
+				    &probe, 1);
+	if (ret)
+		return ret;
+
+	return probe.present ? 0 : -ENODEV;
+}
+
+static void armv8pmu_disable_user_access_ipi(void *unused)
+{
+	armv8pmu_disable_user_access();
+}
+
+static int armv8pmu_proc_user_access_handler(struct ctl_table *table, int write,
+		void *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+	if (ret || !write || sysctl_perf_user_access)
+		return ret;
+
+	on_each_cpu(armv8pmu_disable_user_access_ipi, NULL, 1);
+	return 0;
+}
+
+static struct ctl_table armv8_pmu_sysctl_table[] = {
+	{
+		.procname       = "perf_user_access",
+		.data		= &sysctl_perf_user_access,
+		.maxlen		= sizeof(unsigned int),
+		.mode           = 0644,
+		.proc_handler	= armv8pmu_proc_user_access_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{ }
+};
+
+static void armv8_pmu_register_sysctl_table(void)
+{
+	static u32 tbl_registered = 0;
+
+	if (!cmpxchg_relaxed(&tbl_registered, 0, 1))
+		register_sysctl("kernel", armv8_pmu_sysctl_table);
+}
+
+static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
+			  int (*map_event)(struct perf_event *event),
+			  const struct attribute_group *events,
+			  const struct attribute_group *format,
+			  const struct attribute_group *caps)
+{
+	int ret = armv8pmu_probe_pmu(cpu_pmu);
+	if (ret)
+		return ret;
+
+	cpu_pmu->handle_irq		= armv8pmu_handle_irq;
+	cpu_pmu->enable			= armv8pmu_enable_event;
+	cpu_pmu->disable		= armv8pmu_disable_event;
+	cpu_pmu->read_counter		= armv8pmu_read_counter;
+	cpu_pmu->write_counter		= armv8pmu_write_counter;
+	cpu_pmu->get_event_idx		= armv8pmu_get_event_idx;
+	cpu_pmu->clear_event_idx	= armv8pmu_clear_event_idx;
+	cpu_pmu->start			= armv8pmu_start;
+	cpu_pmu->stop			= armv8pmu_stop;
+	cpu_pmu->reset			= armv8pmu_reset;
+	cpu_pmu->set_event_filter	= armv8pmu_set_event_filter;
+
+	cpu_pmu->pmu.event_idx		= armv8pmu_user_event_idx;
+
+	cpu_pmu->name			= name;
+	cpu_pmu->map_event		= map_event;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = events ?
+			events : &armv8_pmuv3_events_attr_group;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = format ?
+			format : &armv8_pmuv3_format_attr_group;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_CAPS] = caps ?
+			caps : &armv8_pmuv3_caps_attr_group;
+
+	armv8_pmu_register_sysctl_table();
+	return 0;
+}
+
+static int armv8_pmu_init_nogroups(struct arm_pmu *cpu_pmu, char *name,
+				   int (*map_event)(struct perf_event *event))
+{
+	return armv8_pmu_init(cpu_pmu, name, map_event, NULL, NULL, NULL);
+}
+
+#define PMUV3_INIT_SIMPLE(name)						\
+static int name##_pmu_init(struct arm_pmu *cpu_pmu)			\
+{									\
+	return armv8_pmu_init_nogroups(cpu_pmu, #name, armv8_pmuv3_map_event);\
+}
+
+PMUV3_INIT_SIMPLE(armv8_pmuv3)
+
+PMUV3_INIT_SIMPLE(armv8_cortex_a34)
+PMUV3_INIT_SIMPLE(armv8_cortex_a55)
+PMUV3_INIT_SIMPLE(armv8_cortex_a65)
+PMUV3_INIT_SIMPLE(armv8_cortex_a75)
+PMUV3_INIT_SIMPLE(armv8_cortex_a76)
+PMUV3_INIT_SIMPLE(armv8_cortex_a77)
+PMUV3_INIT_SIMPLE(armv8_cortex_a78)
+PMUV3_INIT_SIMPLE(armv9_cortex_a510)
+PMUV3_INIT_SIMPLE(armv9_cortex_a520)
+PMUV3_INIT_SIMPLE(armv9_cortex_a710)
+PMUV3_INIT_SIMPLE(armv9_cortex_a715)
+PMUV3_INIT_SIMPLE(armv9_cortex_a720)
+PMUV3_INIT_SIMPLE(armv8_cortex_x1)
+PMUV3_INIT_SIMPLE(armv9_cortex_x2)
+PMUV3_INIT_SIMPLE(armv9_cortex_x3)
+PMUV3_INIT_SIMPLE(armv9_cortex_x4)
+PMUV3_INIT_SIMPLE(armv8_neoverse_e1)
+PMUV3_INIT_SIMPLE(armv8_neoverse_n1)
+PMUV3_INIT_SIMPLE(armv9_neoverse_n2)
+PMUV3_INIT_SIMPLE(armv8_neoverse_v1)
+
+PMUV3_INIT_SIMPLE(armv8_nvidia_carmel)
+PMUV3_INIT_SIMPLE(armv8_nvidia_denver)
+
+static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a35",
+				       armv8_a53_map_event);
+}
+
+static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a53",
+				       armv8_a53_map_event);
+}
+
+static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a57",
+				       armv8_a57_map_event);
+}
+
+static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a72",
+				       armv8_a57_map_event);
+}
+
+static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a73",
+				       armv8_a73_map_event);
+}
+
+static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cavium_thunder",
+				       armv8_thunder_map_event);
+}
+
+static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	return armv8_pmu_init_nogroups(cpu_pmu, "armv8_brcm_vulcan",
+				       armv8_vulcan_map_event);
+}
+
+static const struct of_device_id armv8_pmu_of_device_ids[] = {
+	{.compatible = "arm,armv8-pmuv3",	.data = armv8_pmuv3_pmu_init},
+	{.compatible = "arm,cortex-a34-pmu",	.data = armv8_cortex_a34_pmu_init},
+	{.compatible = "arm,cortex-a35-pmu",	.data = armv8_a35_pmu_init},
+	{.compatible = "arm,cortex-a53-pmu",	.data = armv8_a53_pmu_init},
+	{.compatible = "arm,cortex-a55-pmu",	.data = armv8_cortex_a55_pmu_init},
+	{.compatible = "arm,cortex-a57-pmu",	.data = armv8_a57_pmu_init},
+	{.compatible = "arm,cortex-a65-pmu",	.data = armv8_cortex_a65_pmu_init},
+	{.compatible = "arm,cortex-a72-pmu",	.data = armv8_a72_pmu_init},
+	{.compatible = "arm,cortex-a73-pmu",	.data = armv8_a73_pmu_init},
+	{.compatible = "arm,cortex-a75-pmu",	.data = armv8_cortex_a75_pmu_init},
+	{.compatible = "arm,cortex-a76-pmu",	.data = armv8_cortex_a76_pmu_init},
+	{.compatible = "arm,cortex-a77-pmu",	.data = armv8_cortex_a77_pmu_init},
+	{.compatible = "arm,cortex-a78-pmu",	.data = armv8_cortex_a78_pmu_init},
+	{.compatible = "arm,cortex-a510-pmu",	.data = armv9_cortex_a510_pmu_init},
+	{.compatible = "arm,cortex-a520-pmu",	.data = armv9_cortex_a520_pmu_init},
+	{.compatible = "arm,cortex-a710-pmu",	.data = armv9_cortex_a710_pmu_init},
+	{.compatible = "arm,cortex-a715-pmu",	.data = armv9_cortex_a715_pmu_init},
+	{.compatible = "arm,cortex-a720-pmu",	.data = armv9_cortex_a720_pmu_init},
+	{.compatible = "arm,cortex-x1-pmu",	.data = armv8_cortex_x1_pmu_init},
+	{.compatible = "arm,cortex-x2-pmu",	.data = armv9_cortex_x2_pmu_init},
+	{.compatible = "arm,cortex-x3-pmu",	.data = armv9_cortex_x3_pmu_init},
+	{.compatible = "arm,cortex-x4-pmu",	.data = armv9_cortex_x4_pmu_init},
+	{.compatible = "arm,neoverse-e1-pmu",	.data = armv8_neoverse_e1_pmu_init},
+	{.compatible = "arm,neoverse-n1-pmu",	.data = armv8_neoverse_n1_pmu_init},
+	{.compatible = "arm,neoverse-n2-pmu",	.data = armv9_neoverse_n2_pmu_init},
+	{.compatible = "arm,neoverse-v1-pmu",	.data = armv8_neoverse_v1_pmu_init},
+	{.compatible = "cavium,thunder-pmu",	.data = armv8_thunder_pmu_init},
+	{.compatible = "brcm,vulcan-pmu",	.data = armv8_vulcan_pmu_init},
+	{.compatible = "nvidia,carmel-pmu",	.data = armv8_nvidia_carmel_pmu_init},
+	{.compatible = "nvidia,denver-pmu",	.data = armv8_nvidia_denver_pmu_init},
+	{},
+};
+
+static int armv8_pmu_device_probe(struct platform_device *pdev)
+{
+	return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL);
+}
+
+static struct platform_driver armv8_pmu_driver = {
+	.driver		= {
+		.name	= ARMV8_PMU_PDEV_NAME,
+		.of_match_table = armv8_pmu_of_device_ids,
+		.suppress_bind_attrs = true,
+	},
+	.probe		= armv8_pmu_device_probe,
+};
+
+static int __init armv8_pmu_driver_init(void)
+{
+	int ret;
+
+	if (acpi_disabled)
+		ret = platform_driver_register(&armv8_pmu_driver);
+	else
+		ret = arm_pmu_acpi_probe(armv8_pmuv3_pmu_init);
+
+	if (!ret)
+		lockup_detector_retry_init();
+
+	return ret;
+}
+device_initcall(armv8_pmu_driver_init)
+
+void arch_perf_update_userpage(struct perf_event *event,
+			       struct perf_event_mmap_page *userpg, u64 now)
+{
+	struct clock_read_data *rd;
+	unsigned int seq;
+	u64 ns;
+
+	userpg->cap_user_time = 0;
+	userpg->cap_user_time_zero = 0;
+	userpg->cap_user_time_short = 0;
+	userpg->cap_user_rdpmc = armv8pmu_event_has_user_read(event);
+
+	if (userpg->cap_user_rdpmc) {
+		if (event->hw.flags & ARMPMU_EVT_64BIT)
+			userpg->pmc_width = 64;
+		else
+			userpg->pmc_width = 32;
+	}
+
+	do {
+		rd = sched_clock_read_begin(&seq);
+
+		if (rd->read_sched_clock != arch_timer_read_counter)
+			return;
+
+		userpg->time_mult = rd->mult;
+		userpg->time_shift = rd->shift;
+		userpg->time_zero = rd->epoch_ns;
+		userpg->time_cycles = rd->epoch_cyc;
+		userpg->time_mask = rd->sched_clock_mask;
+
+		/*
+		 * Subtract the cycle base, such that software that
+		 * doesn't know about cap_user_time_short still 'works'
+		 * assuming no wraps.
+		 */
+		ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift);
+		userpg->time_zero -= ns;
+
+	} while (sched_clock_read_retry(seq));
+
+	userpg->time_offset = userpg->time_zero - now;
+
+	/*
+	 * time_shift is not expected to be greater than 31 due to
+	 * the original published conversion algorithm shifting a
+	 * 32-bit value (now specifies a 64-bit value) - refer
+	 * perf_event_mmap_page documentation in perf_event.h.
+	 */
+	if (userpg->time_shift == 32) {
+		userpg->time_shift = 31;
+		userpg->time_mult >>= 1;
+	}
+
+	/*
+	 * Internal timekeeping for enabled/running/stopped times
+	 * is always computed with the sched_clock.
+	 */
+	userpg->cap_user_time = 1;
+	userpg->cap_user_time_zero = 1;
+	userpg->cap_user_time_short = 1;
+}
diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
new file mode 100644
index 0000000000..6303b82566
--- /dev/null
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -0,0 +1,1035 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * This driver adds support for perf events to use the Performance
+ * Monitor Counter Groups (PMCG) associated with an SMMUv3 node
+ * to monitor that node.
+ *
+ * SMMUv3 PMCG devices are named as smmuv3_pmcg_<phys_addr_page> where
+ * <phys_addr_page> is the physical page address of the SMMU PMCG wrapped
+ * to 4K boundary. For example, the PMCG at 0xff88840000 is named
+ * smmuv3_pmcg_ff88840
+ *
+ * Filtering by stream id is done by specifying filtering parameters
+ * with the event. options are:
+ *   filter_enable    - 0 = no filtering, 1 = filtering enabled
+ *   filter_span      - 0 = exact match, 1 = pattern match
+ *   filter_stream_id - pattern to filter against
+ *
+ * To match a partial StreamID where the X most-significant bits must match
+ * but the Y least-significant bits might differ, STREAMID is programmed
+ * with a value that contains:
+ *  STREAMID[Y - 1] == 0.
+ *  STREAMID[Y - 2:0] == 1 (where Y > 1).
+ * The remainder of implemented bits of STREAMID (X bits, from bit Y upwards)
+ * contain a value to match from the corresponding bits of event StreamID.
+ *
+ * Example: perf stat -e smmuv3_pmcg_ff88840/transaction,filter_enable=1,
+ *                    filter_span=1,filter_stream_id=0x42/ -a netperf
+ * Applies filter pattern 0x42 to transaction events, which means events
+ * matching stream ids 0x42 and 0x43 are counted. Further filtering
+ * information is available in the SMMU documentation.
+ *
+ * SMMU events are not attributable to a CPU, so task mode and sampling
+ * are not supported.
+ */
+
+#include <linux/acpi.h>
+#include <linux/acpi_iort.h>
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/cpuhotplug.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/smp.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+
+#define SMMU_PMCG_EVCNTR0               0x0
+#define SMMU_PMCG_EVCNTR(n, stride)     (SMMU_PMCG_EVCNTR0 + (n) * (stride))
+#define SMMU_PMCG_EVTYPER0              0x400
+#define SMMU_PMCG_EVTYPER(n)            (SMMU_PMCG_EVTYPER0 + (n) * 4)
+#define SMMU_PMCG_SID_SPAN_SHIFT        29
+#define SMMU_PMCG_SMR0                  0xA00
+#define SMMU_PMCG_SMR(n)                (SMMU_PMCG_SMR0 + (n) * 4)
+#define SMMU_PMCG_CNTENSET0             0xC00
+#define SMMU_PMCG_CNTENCLR0             0xC20
+#define SMMU_PMCG_INTENSET0             0xC40
+#define SMMU_PMCG_INTENCLR0             0xC60
+#define SMMU_PMCG_OVSCLR0               0xC80
+#define SMMU_PMCG_OVSSET0               0xCC0
+#define SMMU_PMCG_CFGR                  0xE00
+#define SMMU_PMCG_CFGR_SID_FILTER_TYPE  BIT(23)
+#define SMMU_PMCG_CFGR_MSI              BIT(21)
+#define SMMU_PMCG_CFGR_RELOC_CTRS       BIT(20)
+#define SMMU_PMCG_CFGR_SIZE             GENMASK(13, 8)
+#define SMMU_PMCG_CFGR_NCTR             GENMASK(5, 0)
+#define SMMU_PMCG_CR                    0xE04
+#define SMMU_PMCG_CR_ENABLE             BIT(0)
+#define SMMU_PMCG_IIDR                  0xE08
+#define SMMU_PMCG_IIDR_PRODUCTID        GENMASK(31, 20)
+#define SMMU_PMCG_IIDR_VARIANT          GENMASK(19, 16)
+#define SMMU_PMCG_IIDR_REVISION         GENMASK(15, 12)
+#define SMMU_PMCG_IIDR_IMPLEMENTER      GENMASK(11, 0)
+#define SMMU_PMCG_CEID0                 0xE20
+#define SMMU_PMCG_CEID1                 0xE28
+#define SMMU_PMCG_IRQ_CTRL              0xE50
+#define SMMU_PMCG_IRQ_CTRL_IRQEN        BIT(0)
+#define SMMU_PMCG_IRQ_CFG0              0xE58
+#define SMMU_PMCG_IRQ_CFG1              0xE60
+#define SMMU_PMCG_IRQ_CFG2              0xE64
+
+/* IMP-DEF ID registers */
+#define SMMU_PMCG_PIDR0                 0xFE0
+#define SMMU_PMCG_PIDR0_PART_0          GENMASK(7, 0)
+#define SMMU_PMCG_PIDR1                 0xFE4
+#define SMMU_PMCG_PIDR1_DES_0           GENMASK(7, 4)
+#define SMMU_PMCG_PIDR1_PART_1          GENMASK(3, 0)
+#define SMMU_PMCG_PIDR2                 0xFE8
+#define SMMU_PMCG_PIDR2_REVISION        GENMASK(7, 4)
+#define SMMU_PMCG_PIDR2_DES_1           GENMASK(2, 0)
+#define SMMU_PMCG_PIDR3                 0xFEC
+#define SMMU_PMCG_PIDR3_REVAND          GENMASK(7, 4)
+#define SMMU_PMCG_PIDR4                 0xFD0
+#define SMMU_PMCG_PIDR4_DES_2           GENMASK(3, 0)
+
+/* MSI config fields */
+#define MSI_CFG0_ADDR_MASK              GENMASK_ULL(51, 2)
+#define MSI_CFG2_MEMATTR_DEVICE_nGnRE   0x1
+
+#define SMMU_PMCG_DEFAULT_FILTER_SPAN   1
+#define SMMU_PMCG_DEFAULT_FILTER_SID    GENMASK(31, 0)
+
+#define SMMU_PMCG_MAX_COUNTERS          64
+#define SMMU_PMCG_ARCH_MAX_EVENTS       128
+
+#define SMMU_PMCG_PA_SHIFT              12
+
+#define SMMU_PMCG_EVCNTR_RDONLY         BIT(0)
+#define SMMU_PMCG_HARDEN_DISABLE        BIT(1)
+
+static int cpuhp_state_num;
+
+struct smmu_pmu {
+	struct hlist_node node;
+	struct perf_event *events[SMMU_PMCG_MAX_COUNTERS];
+	DECLARE_BITMAP(used_counters, SMMU_PMCG_MAX_COUNTERS);
+	DECLARE_BITMAP(supported_events, SMMU_PMCG_ARCH_MAX_EVENTS);
+	unsigned int irq;
+	unsigned int on_cpu;
+	struct pmu pmu;
+	unsigned int num_counters;
+	struct device *dev;
+	void __iomem *reg_base;
+	void __iomem *reloc_base;
+	u64 counter_mask;
+	u32 options;
+	u32 iidr;
+	bool global_filter;
+};
+
+#define to_smmu_pmu(p) (container_of(p, struct smmu_pmu, pmu))
+
+#define SMMU_PMU_EVENT_ATTR_EXTRACTOR(_name, _config, _start, _end)        \
+	static inline u32 get_##_name(struct perf_event *event)            \
+	{                                                                  \
+		return FIELD_GET(GENMASK_ULL(_end, _start),                \
+				 event->attr._config);                     \
+	}                                                                  \
+
+SMMU_PMU_EVENT_ATTR_EXTRACTOR(event, config, 0, 15);
+SMMU_PMU_EVENT_ATTR_EXTRACTOR(filter_stream_id, config1, 0, 31);
+SMMU_PMU_EVENT_ATTR_EXTRACTOR(filter_span, config1, 32, 32);
+SMMU_PMU_EVENT_ATTR_EXTRACTOR(filter_enable, config1, 33, 33);
+
+static inline void smmu_pmu_enable(struct pmu *pmu)
+{
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu);
+
+	writel(SMMU_PMCG_IRQ_CTRL_IRQEN,
+	       smmu_pmu->reg_base + SMMU_PMCG_IRQ_CTRL);
+	writel(SMMU_PMCG_CR_ENABLE, smmu_pmu->reg_base + SMMU_PMCG_CR);
+}
+
+static int smmu_pmu_apply_event_filter(struct smmu_pmu *smmu_pmu,
+				       struct perf_event *event, int idx);
+
+static inline void smmu_pmu_enable_quirk_hip08_09(struct pmu *pmu)
+{
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu);
+	unsigned int idx;
+
+	for_each_set_bit(idx, smmu_pmu->used_counters, smmu_pmu->num_counters)
+		smmu_pmu_apply_event_filter(smmu_pmu, smmu_pmu->events[idx], idx);
+
+	smmu_pmu_enable(pmu);
+}
+
+static inline void smmu_pmu_disable(struct pmu *pmu)
+{
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu);
+
+	writel(0, smmu_pmu->reg_base + SMMU_PMCG_CR);
+	writel(0, smmu_pmu->reg_base + SMMU_PMCG_IRQ_CTRL);
+}
+
+static inline void smmu_pmu_disable_quirk_hip08_09(struct pmu *pmu)
+{
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu);
+	unsigned int idx;
+
+	/*
+	 * The global disable of PMU sometimes fail to stop the counting.
+	 * Harden this by writing an invalid event type to each used counter
+	 * to forcibly stop counting.
+	 */
+	for_each_set_bit(idx, smmu_pmu->used_counters, smmu_pmu->num_counters)
+		writel(0xffff, smmu_pmu->reg_base + SMMU_PMCG_EVTYPER(idx));
+
+	smmu_pmu_disable(pmu);
+}
+
+static inline void smmu_pmu_counter_set_value(struct smmu_pmu *smmu_pmu,
+					      u32 idx, u64 value)
+{
+	if (smmu_pmu->counter_mask & BIT(32))
+		writeq(value, smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 8));
+	else
+		writel(value, smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 4));
+}
+
+static inline u64 smmu_pmu_counter_get_value(struct smmu_pmu *smmu_pmu, u32 idx)
+{
+	u64 value;
+
+	if (smmu_pmu->counter_mask & BIT(32))
+		value = readq(smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 8));
+	else
+		value = readl(smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 4));
+
+	return value;
+}
+
+static inline void smmu_pmu_counter_enable(struct smmu_pmu *smmu_pmu, u32 idx)
+{
+	writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_CNTENSET0);
+}
+
+static inline void smmu_pmu_counter_disable(struct smmu_pmu *smmu_pmu, u32 idx)
+{
+	writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_CNTENCLR0);
+}
+
+static inline void smmu_pmu_interrupt_enable(struct smmu_pmu *smmu_pmu, u32 idx)
+{
+	writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_INTENSET0);
+}
+
+static inline void smmu_pmu_interrupt_disable(struct smmu_pmu *smmu_pmu,
+					      u32 idx)
+{
+	writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_INTENCLR0);
+}
+
+static inline void smmu_pmu_set_evtyper(struct smmu_pmu *smmu_pmu, u32 idx,
+					u32 val)
+{
+	writel(val, smmu_pmu->reg_base + SMMU_PMCG_EVTYPER(idx));
+}
+
+static inline void smmu_pmu_set_smr(struct smmu_pmu *smmu_pmu, u32 idx, u32 val)
+{
+	writel(val, smmu_pmu->reg_base + SMMU_PMCG_SMR(idx));
+}
+
+static void smmu_pmu_event_update(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
+	u64 delta, prev, now;
+	u32 idx = hwc->idx;
+
+	do {
+		prev = local64_read(&hwc->prev_count);
+		now = smmu_pmu_counter_get_value(smmu_pmu, idx);
+	} while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
+
+	/* handle overflow. */
+	delta = now - prev;
+	delta &= smmu_pmu->counter_mask;
+
+	local64_add(delta, &event->count);
+}
+
+static void smmu_pmu_set_period(struct smmu_pmu *smmu_pmu,
+				struct hw_perf_event *hwc)
+{
+	u32 idx = hwc->idx;
+	u64 new;
+
+	if (smmu_pmu->options & SMMU_PMCG_EVCNTR_RDONLY) {
+		/*
+		 * On platforms that require this quirk, if the counter starts
+		 * at < half_counter value and wraps, the current logic of
+		 * handling the overflow may not work. It is expected that,
+		 * those platforms will have full 64 counter bits implemented
+		 * so that such a possibility is remote(eg: HiSilicon HIP08).
+		 */
+		new = smmu_pmu_counter_get_value(smmu_pmu, idx);
+	} else {
+		/*
+		 * We limit the max period to half the max counter value
+		 * of the counter size, so that even in the case of extreme
+		 * interrupt latency the counter will (hopefully) not wrap
+		 * past its initial value.
+		 */
+		new = smmu_pmu->counter_mask >> 1;
+		smmu_pmu_counter_set_value(smmu_pmu, idx, new);
+	}
+
+	local64_set(&hwc->prev_count, new);
+}
+
+static void smmu_pmu_set_event_filter(struct perf_event *event,
+				      int idx, u32 span, u32 sid)
+{
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
+	u32 evtyper;
+
+	evtyper = get_event(event) | span << SMMU_PMCG_SID_SPAN_SHIFT;
+	smmu_pmu_set_evtyper(smmu_pmu, idx, evtyper);
+	smmu_pmu_set_smr(smmu_pmu, idx, sid);
+}
+
+static bool smmu_pmu_check_global_filter(struct perf_event *curr,
+					 struct perf_event *new)
+{
+	if (get_filter_enable(new) != get_filter_enable(curr))
+		return false;
+
+	if (!get_filter_enable(new))
+		return true;
+
+	return get_filter_span(new) == get_filter_span(curr) &&
+	       get_filter_stream_id(new) == get_filter_stream_id(curr);
+}
+
+static int smmu_pmu_apply_event_filter(struct smmu_pmu *smmu_pmu,
+				       struct perf_event *event, int idx)
+{
+	u32 span, sid;
+	unsigned int cur_idx, num_ctrs = smmu_pmu->num_counters;
+	bool filter_en = !!get_filter_enable(event);
+
+	span = filter_en ? get_filter_span(event) :
+			   SMMU_PMCG_DEFAULT_FILTER_SPAN;
+	sid = filter_en ? get_filter_stream_id(event) :
+			   SMMU_PMCG_DEFAULT_FILTER_SID;
+
+	cur_idx = find_first_bit(smmu_pmu->used_counters, num_ctrs);
+	/*
+	 * Per-counter filtering, or scheduling the first globally-filtered
+	 * event into an empty PMU so idx == 0 and it works out equivalent.
+	 */
+	if (!smmu_pmu->global_filter || cur_idx == num_ctrs) {
+		smmu_pmu_set_event_filter(event, idx, span, sid);
+		return 0;
+	}
+
+	/* Otherwise, must match whatever's currently scheduled */
+	if (smmu_pmu_check_global_filter(smmu_pmu->events[cur_idx], event)) {
+		smmu_pmu_set_evtyper(smmu_pmu, idx, get_event(event));
+		return 0;
+	}
+
+	return -EAGAIN;
+}
+
+static int smmu_pmu_get_event_idx(struct smmu_pmu *smmu_pmu,
+				  struct perf_event *event)
+{
+	int idx, err;
+	unsigned int num_ctrs = smmu_pmu->num_counters;
+
+	idx = find_first_zero_bit(smmu_pmu->used_counters, num_ctrs);
+	if (idx == num_ctrs)
+		/* The counters are all in use. */
+		return -EAGAIN;
+
+	err = smmu_pmu_apply_event_filter(smmu_pmu, event, idx);
+	if (err)
+		return err;
+
+	set_bit(idx, smmu_pmu->used_counters);
+
+	return idx;
+}
+
+static bool smmu_pmu_events_compatible(struct perf_event *curr,
+				       struct perf_event *new)
+{
+	if (new->pmu != curr->pmu)
+		return false;
+
+	if (to_smmu_pmu(new->pmu)->global_filter &&
+	    !smmu_pmu_check_global_filter(curr, new))
+		return false;
+
+	return true;
+}
+
+/*
+ * Implementation of abstract pmu functionality required by
+ * the core perf events code.
+ */
+
+static int smmu_pmu_event_init(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
+	struct device *dev = smmu_pmu->dev;
+	struct perf_event *sibling;
+	int group_num_events = 1;
+	u16 event_id;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (hwc->sample_period) {
+		dev_dbg(dev, "Sampling not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (event->cpu < 0) {
+		dev_dbg(dev, "Per-task mode not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* Verify specified event is supported on this PMU */
+	event_id = get_event(event);
+	if (event_id < SMMU_PMCG_ARCH_MAX_EVENTS &&
+	    (!test_bit(event_id, smmu_pmu->supported_events))) {
+		dev_dbg(dev, "Invalid event %d for this PMU\n", event_id);
+		return -EINVAL;
+	}
+
+	/* Don't allow groups with mixed PMUs, except for s/w events */
+	if (!is_software_event(event->group_leader)) {
+		if (!smmu_pmu_events_compatible(event->group_leader, event))
+			return -EINVAL;
+
+		if (++group_num_events > smmu_pmu->num_counters)
+			return -EINVAL;
+	}
+
+	for_each_sibling_event(sibling, event->group_leader) {
+		if (is_software_event(sibling))
+			continue;
+
+		if (!smmu_pmu_events_compatible(sibling, event))
+			return -EINVAL;
+
+		if (++group_num_events > smmu_pmu->num_counters)
+			return -EINVAL;
+	}
+
+	hwc->idx = -1;
+
+	/*
+	 * Ensure all events are on the same cpu so all events are in the
+	 * same cpu context, to avoid races on pmu_enable etc.
+	 */
+	event->cpu = smmu_pmu->on_cpu;
+
+	return 0;
+}
+
+static void smmu_pmu_event_start(struct perf_event *event, int flags)
+{
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	hwc->state = 0;
+
+	smmu_pmu_set_period(smmu_pmu, hwc);
+
+	smmu_pmu_counter_enable(smmu_pmu, idx);
+}
+
+static void smmu_pmu_event_stop(struct perf_event *event, int flags)
+{
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (hwc->state & PERF_HES_STOPPED)
+		return;
+
+	smmu_pmu_counter_disable(smmu_pmu, idx);
+	/* As the counter gets updated on _start, ignore PERF_EF_UPDATE */
+	smmu_pmu_event_update(event);
+	hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static int smmu_pmu_event_add(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
+
+	idx = smmu_pmu_get_event_idx(smmu_pmu, event);
+	if (idx < 0)
+		return idx;
+
+	hwc->idx = idx;
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	smmu_pmu->events[idx] = event;
+	local64_set(&hwc->prev_count, 0);
+
+	smmu_pmu_interrupt_enable(smmu_pmu, idx);
+
+	if (flags & PERF_EF_START)
+		smmu_pmu_event_start(event, flags);
+
+	/* Propagate changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+	return 0;
+}
+
+static void smmu_pmu_event_del(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
+	int idx = hwc->idx;
+
+	smmu_pmu_event_stop(event, flags | PERF_EF_UPDATE);
+	smmu_pmu_interrupt_disable(smmu_pmu, idx);
+	smmu_pmu->events[idx] = NULL;
+	clear_bit(idx, smmu_pmu->used_counters);
+
+	perf_event_update_userpage(event);
+}
+
+static void smmu_pmu_event_read(struct perf_event *event)
+{
+	smmu_pmu_event_update(event);
+}
+
+/* cpumask */
+
+static ssize_t smmu_pmu_cpumask_show(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev));
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(smmu_pmu->on_cpu));
+}
+
+static struct device_attribute smmu_pmu_cpumask_attr =
+		__ATTR(cpumask, 0444, smmu_pmu_cpumask_show, NULL);
+
+static struct attribute *smmu_pmu_cpumask_attrs[] = {
+	&smmu_pmu_cpumask_attr.attr,
+	NULL
+};
+
+static const struct attribute_group smmu_pmu_cpumask_group = {
+	.attrs = smmu_pmu_cpumask_attrs,
+};
+
+/* Events */
+
+static ssize_t smmu_pmu_event_show(struct device *dev,
+				   struct device_attribute *attr, char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+
+	return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+#define SMMU_EVENT_ATTR(name, config)			\
+	PMU_EVENT_ATTR_ID(name, smmu_pmu_event_show, config)
+
+static struct attribute *smmu_pmu_events[] = {
+	SMMU_EVENT_ATTR(cycles, 0),
+	SMMU_EVENT_ATTR(transaction, 1),
+	SMMU_EVENT_ATTR(tlb_miss, 2),
+	SMMU_EVENT_ATTR(config_cache_miss, 3),
+	SMMU_EVENT_ATTR(trans_table_walk_access, 4),
+	SMMU_EVENT_ATTR(config_struct_access, 5),
+	SMMU_EVENT_ATTR(pcie_ats_trans_rq, 6),
+	SMMU_EVENT_ATTR(pcie_ats_trans_passed, 7),
+	NULL
+};
+
+static umode_t smmu_pmu_event_is_visible(struct kobject *kobj,
+					 struct attribute *attr, int unused)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev));
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr);
+
+	if (test_bit(pmu_attr->id, smmu_pmu->supported_events))
+		return attr->mode;
+
+	return 0;
+}
+
+static const struct attribute_group smmu_pmu_events_group = {
+	.name = "events",
+	.attrs = smmu_pmu_events,
+	.is_visible = smmu_pmu_event_is_visible,
+};
+
+static ssize_t smmu_pmu_identifier_attr_show(struct device *dev,
+					struct device_attribute *attr,
+					char *page)
+{
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev));
+
+	return sysfs_emit(page, "0x%08x\n", smmu_pmu->iidr);
+}
+
+static umode_t smmu_pmu_identifier_attr_visible(struct kobject *kobj,
+						struct attribute *attr,
+						int n)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev));
+
+	if (!smmu_pmu->iidr)
+		return 0;
+	return attr->mode;
+}
+
+static struct device_attribute smmu_pmu_identifier_attr =
+	__ATTR(identifier, 0444, smmu_pmu_identifier_attr_show, NULL);
+
+static struct attribute *smmu_pmu_identifier_attrs[] = {
+	&smmu_pmu_identifier_attr.attr,
+	NULL
+};
+
+static const struct attribute_group smmu_pmu_identifier_group = {
+	.attrs = smmu_pmu_identifier_attrs,
+	.is_visible = smmu_pmu_identifier_attr_visible,
+};
+
+/* Formats */
+PMU_FORMAT_ATTR(event,		   "config:0-15");
+PMU_FORMAT_ATTR(filter_stream_id,  "config1:0-31");
+PMU_FORMAT_ATTR(filter_span,	   "config1:32");
+PMU_FORMAT_ATTR(filter_enable,	   "config1:33");
+
+static struct attribute *smmu_pmu_formats[] = {
+	&format_attr_event.attr,
+	&format_attr_filter_stream_id.attr,
+	&format_attr_filter_span.attr,
+	&format_attr_filter_enable.attr,
+	NULL
+};
+
+static const struct attribute_group smmu_pmu_format_group = {
+	.name = "format",
+	.attrs = smmu_pmu_formats,
+};
+
+static const struct attribute_group *smmu_pmu_attr_grps[] = {
+	&smmu_pmu_cpumask_group,
+	&smmu_pmu_events_group,
+	&smmu_pmu_format_group,
+	&smmu_pmu_identifier_group,
+	NULL
+};
+
+/*
+ * Generic device handlers
+ */
+
+static int smmu_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct smmu_pmu *smmu_pmu;
+	unsigned int target;
+
+	smmu_pmu = hlist_entry_safe(node, struct smmu_pmu, node);
+	if (cpu != smmu_pmu->on_cpu)
+		return 0;
+
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	perf_pmu_migrate_context(&smmu_pmu->pmu, cpu, target);
+	smmu_pmu->on_cpu = target;
+	WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(target)));
+
+	return 0;
+}
+
+static irqreturn_t smmu_pmu_handle_irq(int irq_num, void *data)
+{
+	struct smmu_pmu *smmu_pmu = data;
+	DECLARE_BITMAP(ovs, BITS_PER_TYPE(u64));
+	u64 ovsr;
+	unsigned int idx;
+
+	ovsr = readq(smmu_pmu->reloc_base + SMMU_PMCG_OVSSET0);
+	if (!ovsr)
+		return IRQ_NONE;
+
+	writeq(ovsr, smmu_pmu->reloc_base + SMMU_PMCG_OVSCLR0);
+
+	bitmap_from_u64(ovs, ovsr);
+	for_each_set_bit(idx, ovs, smmu_pmu->num_counters) {
+		struct perf_event *event = smmu_pmu->events[idx];
+		struct hw_perf_event *hwc;
+
+		if (WARN_ON_ONCE(!event))
+			continue;
+
+		smmu_pmu_event_update(event);
+		hwc = &event->hw;
+
+		smmu_pmu_set_period(smmu_pmu, hwc);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void smmu_pmu_free_msis(void *data)
+{
+	struct device *dev = data;
+
+	platform_msi_domain_free_irqs(dev);
+}
+
+static void smmu_pmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
+{
+	phys_addr_t doorbell;
+	struct device *dev = msi_desc_to_dev(desc);
+	struct smmu_pmu *pmu = dev_get_drvdata(dev);
+
+	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
+	doorbell &= MSI_CFG0_ADDR_MASK;
+
+	writeq_relaxed(doorbell, pmu->reg_base + SMMU_PMCG_IRQ_CFG0);
+	writel_relaxed(msg->data, pmu->reg_base + SMMU_PMCG_IRQ_CFG1);
+	writel_relaxed(MSI_CFG2_MEMATTR_DEVICE_nGnRE,
+		       pmu->reg_base + SMMU_PMCG_IRQ_CFG2);
+}
+
+static void smmu_pmu_setup_msi(struct smmu_pmu *pmu)
+{
+	struct device *dev = pmu->dev;
+	int ret;
+
+	/* Clear MSI address reg */
+	writeq_relaxed(0, pmu->reg_base + SMMU_PMCG_IRQ_CFG0);
+
+	/* MSI supported or not */
+	if (!(readl(pmu->reg_base + SMMU_PMCG_CFGR) & SMMU_PMCG_CFGR_MSI))
+		return;
+
+	ret = platform_msi_domain_alloc_irqs(dev, 1, smmu_pmu_write_msi_msg);
+	if (ret) {
+		dev_warn(dev, "failed to allocate MSIs\n");
+		return;
+	}
+
+	pmu->irq = msi_get_virq(dev, 0);
+
+	/* Add callback to free MSIs on teardown */
+	devm_add_action(dev, smmu_pmu_free_msis, dev);
+}
+
+static int smmu_pmu_setup_irq(struct smmu_pmu *pmu)
+{
+	unsigned long flags = IRQF_NOBALANCING | IRQF_SHARED | IRQF_NO_THREAD;
+	int irq, ret = -ENXIO;
+
+	smmu_pmu_setup_msi(pmu);
+
+	irq = pmu->irq;
+	if (irq)
+		ret = devm_request_irq(pmu->dev, irq, smmu_pmu_handle_irq,
+				       flags, "smmuv3-pmu", pmu);
+	return ret;
+}
+
+static void smmu_pmu_reset(struct smmu_pmu *smmu_pmu)
+{
+	u64 counter_present_mask = GENMASK_ULL(smmu_pmu->num_counters - 1, 0);
+
+	smmu_pmu_disable(&smmu_pmu->pmu);
+
+	/* Disable counter and interrupt */
+	writeq_relaxed(counter_present_mask,
+		       smmu_pmu->reg_base + SMMU_PMCG_CNTENCLR0);
+	writeq_relaxed(counter_present_mask,
+		       smmu_pmu->reg_base + SMMU_PMCG_INTENCLR0);
+	writeq_relaxed(counter_present_mask,
+		       smmu_pmu->reloc_base + SMMU_PMCG_OVSCLR0);
+}
+
+static void smmu_pmu_get_acpi_options(struct smmu_pmu *smmu_pmu)
+{
+	u32 model;
+
+	model = *(u32 *)dev_get_platdata(smmu_pmu->dev);
+
+	switch (model) {
+	case IORT_SMMU_V3_PMCG_HISI_HIP08:
+		/* HiSilicon Erratum 162001800 */
+		smmu_pmu->options |= SMMU_PMCG_EVCNTR_RDONLY | SMMU_PMCG_HARDEN_DISABLE;
+		break;
+	case IORT_SMMU_V3_PMCG_HISI_HIP09:
+		smmu_pmu->options |= SMMU_PMCG_HARDEN_DISABLE;
+		break;
+	}
+
+	dev_notice(smmu_pmu->dev, "option mask 0x%x\n", smmu_pmu->options);
+}
+
+static bool smmu_pmu_coresight_id_regs(struct smmu_pmu *smmu_pmu)
+{
+	return of_device_is_compatible(smmu_pmu->dev->of_node,
+				       "arm,mmu-600-pmcg");
+}
+
+static void smmu_pmu_get_iidr(struct smmu_pmu *smmu_pmu)
+{
+	u32 iidr = readl_relaxed(smmu_pmu->reg_base + SMMU_PMCG_IIDR);
+
+	if (!iidr && smmu_pmu_coresight_id_regs(smmu_pmu)) {
+		u32 pidr0 = readl(smmu_pmu->reg_base + SMMU_PMCG_PIDR0);
+		u32 pidr1 = readl(smmu_pmu->reg_base + SMMU_PMCG_PIDR1);
+		u32 pidr2 = readl(smmu_pmu->reg_base + SMMU_PMCG_PIDR2);
+		u32 pidr3 = readl(smmu_pmu->reg_base + SMMU_PMCG_PIDR3);
+		u32 pidr4 = readl(smmu_pmu->reg_base + SMMU_PMCG_PIDR4);
+
+		u32 productid = FIELD_GET(SMMU_PMCG_PIDR0_PART_0, pidr0) |
+				(FIELD_GET(SMMU_PMCG_PIDR1_PART_1, pidr1) << 8);
+		u32 variant = FIELD_GET(SMMU_PMCG_PIDR2_REVISION, pidr2);
+		u32 revision = FIELD_GET(SMMU_PMCG_PIDR3_REVAND, pidr3);
+		u32 implementer =
+			FIELD_GET(SMMU_PMCG_PIDR1_DES_0, pidr1) |
+			(FIELD_GET(SMMU_PMCG_PIDR2_DES_1, pidr2) << 4) |
+			(FIELD_GET(SMMU_PMCG_PIDR4_DES_2, pidr4) << 8);
+
+		iidr = FIELD_PREP(SMMU_PMCG_IIDR_PRODUCTID, productid) |
+		       FIELD_PREP(SMMU_PMCG_IIDR_VARIANT, variant) |
+		       FIELD_PREP(SMMU_PMCG_IIDR_REVISION, revision) |
+		       FIELD_PREP(SMMU_PMCG_IIDR_IMPLEMENTER, implementer);
+	}
+
+	smmu_pmu->iidr = iidr;
+}
+
+static int smmu_pmu_probe(struct platform_device *pdev)
+{
+	struct smmu_pmu *smmu_pmu;
+	struct resource *res_0;
+	u32 cfgr, reg_size;
+	u64 ceid_64[2];
+	int irq, err;
+	char *name;
+	struct device *dev = &pdev->dev;
+
+	smmu_pmu = devm_kzalloc(dev, sizeof(*smmu_pmu), GFP_KERNEL);
+	if (!smmu_pmu)
+		return -ENOMEM;
+
+	smmu_pmu->dev = dev;
+	platform_set_drvdata(pdev, smmu_pmu);
+
+	smmu_pmu->pmu = (struct pmu) {
+		.module		= THIS_MODULE,
+		.task_ctx_nr    = perf_invalid_context,
+		.pmu_enable	= smmu_pmu_enable,
+		.pmu_disable	= smmu_pmu_disable,
+		.event_init	= smmu_pmu_event_init,
+		.add		= smmu_pmu_event_add,
+		.del		= smmu_pmu_event_del,
+		.start		= smmu_pmu_event_start,
+		.stop		= smmu_pmu_event_stop,
+		.read		= smmu_pmu_event_read,
+		.attr_groups	= smmu_pmu_attr_grps,
+		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
+	};
+
+	smmu_pmu->reg_base = devm_platform_get_and_ioremap_resource(pdev, 0, &res_0);
+	if (IS_ERR(smmu_pmu->reg_base))
+		return PTR_ERR(smmu_pmu->reg_base);
+
+	cfgr = readl_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CFGR);
+
+	/* Determine if page 1 is present */
+	if (cfgr & SMMU_PMCG_CFGR_RELOC_CTRS) {
+		smmu_pmu->reloc_base = devm_platform_ioremap_resource(pdev, 1);
+		if (IS_ERR(smmu_pmu->reloc_base))
+			return PTR_ERR(smmu_pmu->reloc_base);
+	} else {
+		smmu_pmu->reloc_base = smmu_pmu->reg_base;
+	}
+
+	irq = platform_get_irq_optional(pdev, 0);
+	if (irq > 0)
+		smmu_pmu->irq = irq;
+
+	ceid_64[0] = readq_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CEID0);
+	ceid_64[1] = readq_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CEID1);
+	bitmap_from_arr32(smmu_pmu->supported_events, (u32 *)ceid_64,
+			  SMMU_PMCG_ARCH_MAX_EVENTS);
+
+	smmu_pmu->num_counters = FIELD_GET(SMMU_PMCG_CFGR_NCTR, cfgr) + 1;
+
+	smmu_pmu->global_filter = !!(cfgr & SMMU_PMCG_CFGR_SID_FILTER_TYPE);
+
+	reg_size = FIELD_GET(SMMU_PMCG_CFGR_SIZE, cfgr);
+	smmu_pmu->counter_mask = GENMASK_ULL(reg_size, 0);
+
+	smmu_pmu_reset(smmu_pmu);
+
+	err = smmu_pmu_setup_irq(smmu_pmu);
+	if (err) {
+		dev_err(dev, "Setup irq failed, PMU @%pa\n", &res_0->start);
+		return err;
+	}
+
+	smmu_pmu_get_iidr(smmu_pmu);
+
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "smmuv3_pmcg_%llx",
+			      (res_0->start) >> SMMU_PMCG_PA_SHIFT);
+	if (!name) {
+		dev_err(dev, "Create name failed, PMU @%pa\n", &res_0->start);
+		return -EINVAL;
+	}
+
+	if (!dev->of_node)
+		smmu_pmu_get_acpi_options(smmu_pmu);
+
+	/*
+	 * For platforms suffer this quirk, the PMU disable sometimes fails to
+	 * stop the counters. This will leads to inaccurate or error counting.
+	 * Forcibly disable the counters with these quirk handler.
+	 */
+	if (smmu_pmu->options & SMMU_PMCG_HARDEN_DISABLE) {
+		smmu_pmu->pmu.pmu_enable = smmu_pmu_enable_quirk_hip08_09;
+		smmu_pmu->pmu.pmu_disable = smmu_pmu_disable_quirk_hip08_09;
+	}
+
+	/* Pick one CPU to be the preferred one to use */
+	smmu_pmu->on_cpu = raw_smp_processor_id();
+	WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(smmu_pmu->on_cpu)));
+
+	err = cpuhp_state_add_instance_nocalls(cpuhp_state_num,
+					       &smmu_pmu->node);
+	if (err) {
+		dev_err(dev, "Error %d registering hotplug, PMU @%pa\n",
+			err, &res_0->start);
+		return err;
+	}
+
+	err = perf_pmu_register(&smmu_pmu->pmu, name, -1);
+	if (err) {
+		dev_err(dev, "Error %d registering PMU @%pa\n",
+			err, &res_0->start);
+		goto out_unregister;
+	}
+
+	dev_info(dev, "Registered PMU @ %pa using %d counters with %s filter settings\n",
+		 &res_0->start, smmu_pmu->num_counters,
+		 smmu_pmu->global_filter ? "Global(Counter0)" :
+		 "Individual");
+
+	return 0;
+
+out_unregister:
+	cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node);
+	return err;
+}
+
+static int smmu_pmu_remove(struct platform_device *pdev)
+{
+	struct smmu_pmu *smmu_pmu = platform_get_drvdata(pdev);
+
+	perf_pmu_unregister(&smmu_pmu->pmu);
+	cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node);
+
+	return 0;
+}
+
+static void smmu_pmu_shutdown(struct platform_device *pdev)
+{
+	struct smmu_pmu *smmu_pmu = platform_get_drvdata(pdev);
+
+	smmu_pmu_disable(&smmu_pmu->pmu);
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id smmu_pmu_of_match[] = {
+	{ .compatible = "arm,smmu-v3-pmcg" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, smmu_pmu_of_match);
+#endif
+
+static struct platform_driver smmu_pmu_driver = {
+	.driver = {
+		.name = "arm-smmu-v3-pmcg",
+		.of_match_table = of_match_ptr(smmu_pmu_of_match),
+		.suppress_bind_attrs = true,
+	},
+	.probe = smmu_pmu_probe,
+	.remove = smmu_pmu_remove,
+	.shutdown = smmu_pmu_shutdown,
+};
+
+static int __init arm_smmu_pmu_init(void)
+{
+	int ret;
+
+	cpuhp_state_num = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+						  "perf/arm/pmcg:online",
+						  NULL,
+						  smmu_pmu_offline_cpu);
+	if (cpuhp_state_num < 0)
+		return cpuhp_state_num;
+
+	ret = platform_driver_register(&smmu_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(cpuhp_state_num);
+
+	return ret;
+}
+module_init(arm_smmu_pmu_init);
+
+static void __exit arm_smmu_pmu_exit(void)
+{
+	platform_driver_unregister(&smmu_pmu_driver);
+	cpuhp_remove_multi_state(cpuhp_state_num);
+}
+
+module_exit(arm_smmu_pmu_exit);
+
+MODULE_ALIAS("platform:arm-smmu-v3-pmcg");
+MODULE_DESCRIPTION("PMU driver for ARM SMMUv3 Performance Monitors Extension");
+MODULE_AUTHOR("Neil Leeder <nleeder@codeaurora.org>");
+MODULE_AUTHOR("Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
new file mode 100644
index 0000000000..d2b0cbf0e0
--- /dev/null
+++ b/drivers/perf/arm_spe_pmu.c
@@ -0,0 +1,1338 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Perf support for the Statistical Profiling Extension, introduced as
+ * part of ARMv8.2.
+ *
+ * Copyright (C) 2016 ARM Limited
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#define PMUNAME					"arm_spe"
+#define DRVNAME					PMUNAME "_pmu"
+#define pr_fmt(fmt)				DRVNAME ": " fmt
+
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/bug.h>
+#include <linux/capability.h>
+#include <linux/cpuhotplug.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/perf_event.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/vmalloc.h>
+
+#include <asm/barrier.h>
+#include <asm/cpufeature.h>
+#include <asm/mmu.h>
+#include <asm/sysreg.h>
+
+/*
+ * Cache if the event is allowed to trace Context information.
+ * This allows us to perform the check, i.e, perfmon_capable(),
+ * in the context of the event owner, once, during the event_init().
+ */
+#define SPE_PMU_HW_FLAGS_CX			0x00001
+
+static_assert((PERF_EVENT_FLAG_ARCH & SPE_PMU_HW_FLAGS_CX) == SPE_PMU_HW_FLAGS_CX);
+
+static void set_spe_event_has_cx(struct perf_event *event)
+{
+	if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && perfmon_capable())
+		event->hw.flags |= SPE_PMU_HW_FLAGS_CX;
+}
+
+static bool get_spe_event_has_cx(struct perf_event *event)
+{
+	return !!(event->hw.flags & SPE_PMU_HW_FLAGS_CX);
+}
+
+#define ARM_SPE_BUF_PAD_BYTE			0
+
+struct arm_spe_pmu_buf {
+	int					nr_pages;
+	bool					snapshot;
+	void					*base;
+};
+
+struct arm_spe_pmu {
+	struct pmu				pmu;
+	struct platform_device			*pdev;
+	cpumask_t				supported_cpus;
+	struct hlist_node			hotplug_node;
+
+	int					irq; /* PPI */
+	u16					pmsver;
+	u16					min_period;
+	u16					counter_sz;
+
+#define SPE_PMU_FEAT_FILT_EVT			(1UL << 0)
+#define SPE_PMU_FEAT_FILT_TYP			(1UL << 1)
+#define SPE_PMU_FEAT_FILT_LAT			(1UL << 2)
+#define SPE_PMU_FEAT_ARCH_INST			(1UL << 3)
+#define SPE_PMU_FEAT_LDS			(1UL << 4)
+#define SPE_PMU_FEAT_ERND			(1UL << 5)
+#define SPE_PMU_FEAT_INV_FILT_EVT		(1UL << 6)
+#define SPE_PMU_FEAT_DEV_PROBED			(1UL << 63)
+	u64					features;
+
+	u16					max_record_sz;
+	u16					align;
+	struct perf_output_handle __percpu	*handle;
+};
+
+#define to_spe_pmu(p) (container_of(p, struct arm_spe_pmu, pmu))
+
+/* Convert a free-running index from perf into an SPE buffer offset */
+#define PERF_IDX2OFF(idx, buf)	((idx) % ((buf)->nr_pages << PAGE_SHIFT))
+
+/* Keep track of our dynamic hotplug state */
+static enum cpuhp_state arm_spe_pmu_online;
+
+enum arm_spe_pmu_buf_fault_action {
+	SPE_PMU_BUF_FAULT_ACT_SPURIOUS,
+	SPE_PMU_BUF_FAULT_ACT_FATAL,
+	SPE_PMU_BUF_FAULT_ACT_OK,
+};
+
+/* This sysfs gunk was really good fun to write. */
+enum arm_spe_pmu_capabilities {
+	SPE_PMU_CAP_ARCH_INST = 0,
+	SPE_PMU_CAP_ERND,
+	SPE_PMU_CAP_FEAT_MAX,
+	SPE_PMU_CAP_CNT_SZ = SPE_PMU_CAP_FEAT_MAX,
+	SPE_PMU_CAP_MIN_IVAL,
+};
+
+static int arm_spe_pmu_feat_caps[SPE_PMU_CAP_FEAT_MAX] = {
+	[SPE_PMU_CAP_ARCH_INST]	= SPE_PMU_FEAT_ARCH_INST,
+	[SPE_PMU_CAP_ERND]	= SPE_PMU_FEAT_ERND,
+};
+
+static u32 arm_spe_pmu_cap_get(struct arm_spe_pmu *spe_pmu, int cap)
+{
+	if (cap < SPE_PMU_CAP_FEAT_MAX)
+		return !!(spe_pmu->features & arm_spe_pmu_feat_caps[cap]);
+
+	switch (cap) {
+	case SPE_PMU_CAP_CNT_SZ:
+		return spe_pmu->counter_sz;
+	case SPE_PMU_CAP_MIN_IVAL:
+		return spe_pmu->min_period;
+	default:
+		WARN(1, "unknown cap %d\n", cap);
+	}
+
+	return 0;
+}
+
+static ssize_t arm_spe_pmu_cap_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct arm_spe_pmu *spe_pmu = dev_get_drvdata(dev);
+	struct dev_ext_attribute *ea =
+		container_of(attr, struct dev_ext_attribute, attr);
+	int cap = (long)ea->var;
+
+	return sysfs_emit(buf, "%u\n", arm_spe_pmu_cap_get(spe_pmu, cap));
+}
+
+#define SPE_EXT_ATTR_ENTRY(_name, _func, _var)				\
+	&((struct dev_ext_attribute[]) {				\
+		{ __ATTR(_name, S_IRUGO, _func, NULL), (void *)_var }	\
+	})[0].attr.attr
+
+#define SPE_CAP_EXT_ATTR_ENTRY(_name, _var)				\
+	SPE_EXT_ATTR_ENTRY(_name, arm_spe_pmu_cap_show, _var)
+
+static struct attribute *arm_spe_pmu_cap_attr[] = {
+	SPE_CAP_EXT_ATTR_ENTRY(arch_inst, SPE_PMU_CAP_ARCH_INST),
+	SPE_CAP_EXT_ATTR_ENTRY(ernd, SPE_PMU_CAP_ERND),
+	SPE_CAP_EXT_ATTR_ENTRY(count_size, SPE_PMU_CAP_CNT_SZ),
+	SPE_CAP_EXT_ATTR_ENTRY(min_interval, SPE_PMU_CAP_MIN_IVAL),
+	NULL,
+};
+
+static const struct attribute_group arm_spe_pmu_cap_group = {
+	.name	= "caps",
+	.attrs	= arm_spe_pmu_cap_attr,
+};
+
+/* User ABI */
+#define ATTR_CFG_FLD_ts_enable_CFG		config	/* PMSCR_EL1.TS */
+#define ATTR_CFG_FLD_ts_enable_LO		0
+#define ATTR_CFG_FLD_ts_enable_HI		0
+#define ATTR_CFG_FLD_pa_enable_CFG		config	/* PMSCR_EL1.PA */
+#define ATTR_CFG_FLD_pa_enable_LO		1
+#define ATTR_CFG_FLD_pa_enable_HI		1
+#define ATTR_CFG_FLD_pct_enable_CFG		config	/* PMSCR_EL1.PCT */
+#define ATTR_CFG_FLD_pct_enable_LO		2
+#define ATTR_CFG_FLD_pct_enable_HI		2
+#define ATTR_CFG_FLD_jitter_CFG			config	/* PMSIRR_EL1.RND */
+#define ATTR_CFG_FLD_jitter_LO			16
+#define ATTR_CFG_FLD_jitter_HI			16
+#define ATTR_CFG_FLD_branch_filter_CFG		config	/* PMSFCR_EL1.B */
+#define ATTR_CFG_FLD_branch_filter_LO		32
+#define ATTR_CFG_FLD_branch_filter_HI		32
+#define ATTR_CFG_FLD_load_filter_CFG		config	/* PMSFCR_EL1.LD */
+#define ATTR_CFG_FLD_load_filter_LO		33
+#define ATTR_CFG_FLD_load_filter_HI		33
+#define ATTR_CFG_FLD_store_filter_CFG		config	/* PMSFCR_EL1.ST */
+#define ATTR_CFG_FLD_store_filter_LO		34
+#define ATTR_CFG_FLD_store_filter_HI		34
+
+#define ATTR_CFG_FLD_event_filter_CFG		config1	/* PMSEVFR_EL1 */
+#define ATTR_CFG_FLD_event_filter_LO		0
+#define ATTR_CFG_FLD_event_filter_HI		63
+
+#define ATTR_CFG_FLD_min_latency_CFG		config2	/* PMSLATFR_EL1.MINLAT */
+#define ATTR_CFG_FLD_min_latency_LO		0
+#define ATTR_CFG_FLD_min_latency_HI		11
+
+#define ATTR_CFG_FLD_inv_event_filter_CFG	config3	/* PMSNEVFR_EL1 */
+#define ATTR_CFG_FLD_inv_event_filter_LO	0
+#define ATTR_CFG_FLD_inv_event_filter_HI	63
+
+/* Why does everything I do descend into this? */
+#define __GEN_PMU_FORMAT_ATTR(cfg, lo, hi)				\
+	(lo) == (hi) ? #cfg ":" #lo "\n" : #cfg ":" #lo "-" #hi
+
+#define _GEN_PMU_FORMAT_ATTR(cfg, lo, hi)				\
+	__GEN_PMU_FORMAT_ATTR(cfg, lo, hi)
+
+#define GEN_PMU_FORMAT_ATTR(name)					\
+	PMU_FORMAT_ATTR(name,						\
+	_GEN_PMU_FORMAT_ATTR(ATTR_CFG_FLD_##name##_CFG,			\
+			     ATTR_CFG_FLD_##name##_LO,			\
+			     ATTR_CFG_FLD_##name##_HI))
+
+#define _ATTR_CFG_GET_FLD(attr, cfg, lo, hi)				\
+	((((attr)->cfg) >> lo) & GENMASK(hi - lo, 0))
+
+#define ATTR_CFG_GET_FLD(attr, name)					\
+	_ATTR_CFG_GET_FLD(attr,						\
+			  ATTR_CFG_FLD_##name##_CFG,			\
+			  ATTR_CFG_FLD_##name##_LO,			\
+			  ATTR_CFG_FLD_##name##_HI)
+
+GEN_PMU_FORMAT_ATTR(ts_enable);
+GEN_PMU_FORMAT_ATTR(pa_enable);
+GEN_PMU_FORMAT_ATTR(pct_enable);
+GEN_PMU_FORMAT_ATTR(jitter);
+GEN_PMU_FORMAT_ATTR(branch_filter);
+GEN_PMU_FORMAT_ATTR(load_filter);
+GEN_PMU_FORMAT_ATTR(store_filter);
+GEN_PMU_FORMAT_ATTR(event_filter);
+GEN_PMU_FORMAT_ATTR(inv_event_filter);
+GEN_PMU_FORMAT_ATTR(min_latency);
+
+static struct attribute *arm_spe_pmu_formats_attr[] = {
+	&format_attr_ts_enable.attr,
+	&format_attr_pa_enable.attr,
+	&format_attr_pct_enable.attr,
+	&format_attr_jitter.attr,
+	&format_attr_branch_filter.attr,
+	&format_attr_load_filter.attr,
+	&format_attr_store_filter.attr,
+	&format_attr_event_filter.attr,
+	&format_attr_inv_event_filter.attr,
+	&format_attr_min_latency.attr,
+	NULL,
+};
+
+static umode_t arm_spe_pmu_format_attr_is_visible(struct kobject *kobj,
+						  struct attribute *attr,
+						  int unused)
+	{
+	struct device *dev = kobj_to_dev(kobj);
+	struct arm_spe_pmu *spe_pmu = dev_get_drvdata(dev);
+
+	if (attr == &format_attr_inv_event_filter.attr && !(spe_pmu->features & SPE_PMU_FEAT_INV_FILT_EVT))
+		return 0;
+
+	return attr->mode;
+}
+
+static const struct attribute_group arm_spe_pmu_format_group = {
+	.name	= "format",
+	.is_visible = arm_spe_pmu_format_attr_is_visible,
+	.attrs	= arm_spe_pmu_formats_attr,
+};
+
+static ssize_t cpumask_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct arm_spe_pmu *spe_pmu = dev_get_drvdata(dev);
+
+	return cpumap_print_to_pagebuf(true, buf, &spe_pmu->supported_cpus);
+}
+static DEVICE_ATTR_RO(cpumask);
+
+static struct attribute *arm_spe_pmu_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static const struct attribute_group arm_spe_pmu_group = {
+	.attrs	= arm_spe_pmu_attrs,
+};
+
+static const struct attribute_group *arm_spe_pmu_attr_groups[] = {
+	&arm_spe_pmu_group,
+	&arm_spe_pmu_cap_group,
+	&arm_spe_pmu_format_group,
+	NULL,
+};
+
+/* Convert between user ABI and register values */
+static u64 arm_spe_event_to_pmscr(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	u64 reg = 0;
+
+	reg |= FIELD_PREP(PMSCR_EL1_TS, ATTR_CFG_GET_FLD(attr, ts_enable));
+	reg |= FIELD_PREP(PMSCR_EL1_PA, ATTR_CFG_GET_FLD(attr, pa_enable));
+	reg |= FIELD_PREP(PMSCR_EL1_PCT, ATTR_CFG_GET_FLD(attr, pct_enable));
+
+	if (!attr->exclude_user)
+		reg |= PMSCR_EL1_E0SPE;
+
+	if (!attr->exclude_kernel)
+		reg |= PMSCR_EL1_E1SPE;
+
+	if (get_spe_event_has_cx(event))
+		reg |= PMSCR_EL1_CX;
+
+	return reg;
+}
+
+static void arm_spe_event_sanitise_period(struct perf_event *event)
+{
+	struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu);
+	u64 period = event->hw.sample_period;
+	u64 max_period = PMSIRR_EL1_INTERVAL_MASK;
+
+	if (period < spe_pmu->min_period)
+		period = spe_pmu->min_period;
+	else if (period > max_period)
+		period = max_period;
+	else
+		period &= max_period;
+
+	event->hw.sample_period = period;
+}
+
+static u64 arm_spe_event_to_pmsirr(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	u64 reg = 0;
+
+	arm_spe_event_sanitise_period(event);
+
+	reg |= FIELD_PREP(PMSIRR_EL1_RND, ATTR_CFG_GET_FLD(attr, jitter));
+	reg |= event->hw.sample_period;
+
+	return reg;
+}
+
+static u64 arm_spe_event_to_pmsfcr(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	u64 reg = 0;
+
+	reg |= FIELD_PREP(PMSFCR_EL1_LD, ATTR_CFG_GET_FLD(attr, load_filter));
+	reg |= FIELD_PREP(PMSFCR_EL1_ST, ATTR_CFG_GET_FLD(attr, store_filter));
+	reg |= FIELD_PREP(PMSFCR_EL1_B, ATTR_CFG_GET_FLD(attr, branch_filter));
+
+	if (reg)
+		reg |= PMSFCR_EL1_FT;
+
+	if (ATTR_CFG_GET_FLD(attr, event_filter))
+		reg |= PMSFCR_EL1_FE;
+
+	if (ATTR_CFG_GET_FLD(attr, inv_event_filter))
+		reg |= PMSFCR_EL1_FnE;
+
+	if (ATTR_CFG_GET_FLD(attr, min_latency))
+		reg |= PMSFCR_EL1_FL;
+
+	return reg;
+}
+
+static u64 arm_spe_event_to_pmsevfr(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	return ATTR_CFG_GET_FLD(attr, event_filter);
+}
+
+static u64 arm_spe_event_to_pmsnevfr(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	return ATTR_CFG_GET_FLD(attr, inv_event_filter);
+}
+
+static u64 arm_spe_event_to_pmslatfr(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	return FIELD_PREP(PMSLATFR_EL1_MINLAT, ATTR_CFG_GET_FLD(attr, min_latency));
+}
+
+static void arm_spe_pmu_pad_buf(struct perf_output_handle *handle, int len)
+{
+	struct arm_spe_pmu_buf *buf = perf_get_aux(handle);
+	u64 head = PERF_IDX2OFF(handle->head, buf);
+
+	memset(buf->base + head, ARM_SPE_BUF_PAD_BYTE, len);
+	if (!buf->snapshot)
+		perf_aux_output_skip(handle, len);
+}
+
+static u64 arm_spe_pmu_next_snapshot_off(struct perf_output_handle *handle)
+{
+	struct arm_spe_pmu_buf *buf = perf_get_aux(handle);
+	struct arm_spe_pmu *spe_pmu = to_spe_pmu(handle->event->pmu);
+	u64 head = PERF_IDX2OFF(handle->head, buf);
+	u64 limit = buf->nr_pages * PAGE_SIZE;
+
+	/*
+	 * The trace format isn't parseable in reverse, so clamp
+	 * the limit to half of the buffer size in snapshot mode
+	 * so that the worst case is half a buffer of records, as
+	 * opposed to a single record.
+	 */
+	if (head < limit >> 1)
+		limit >>= 1;
+
+	/*
+	 * If we're within max_record_sz of the limit, we must
+	 * pad, move the head index and recompute the limit.
+	 */
+	if (limit - head < spe_pmu->max_record_sz) {
+		arm_spe_pmu_pad_buf(handle, limit - head);
+		handle->head = PERF_IDX2OFF(limit, buf);
+		limit = ((buf->nr_pages * PAGE_SIZE) >> 1) + handle->head;
+	}
+
+	return limit;
+}
+
+static u64 __arm_spe_pmu_next_off(struct perf_output_handle *handle)
+{
+	struct arm_spe_pmu *spe_pmu = to_spe_pmu(handle->event->pmu);
+	struct arm_spe_pmu_buf *buf = perf_get_aux(handle);
+	const u64 bufsize = buf->nr_pages * PAGE_SIZE;
+	u64 limit = bufsize;
+	u64 head, tail, wakeup;
+
+	/*
+	 * The head can be misaligned for two reasons:
+	 *
+	 * 1. The hardware left PMBPTR pointing to the first byte after
+	 *    a record when generating a buffer management event.
+	 *
+	 * 2. We used perf_aux_output_skip to consume handle->size bytes
+	 *    and CIRC_SPACE was used to compute the size, which always
+	 *    leaves one entry free.
+	 *
+	 * Deal with this by padding to the next alignment boundary and
+	 * moving the head index. If we run out of buffer space, we'll
+	 * reduce handle->size to zero and end up reporting truncation.
+	 */
+	head = PERF_IDX2OFF(handle->head, buf);
+	if (!IS_ALIGNED(head, spe_pmu->align)) {
+		unsigned long delta = roundup(head, spe_pmu->align) - head;
+
+		delta = min(delta, handle->size);
+		arm_spe_pmu_pad_buf(handle, delta);
+		head = PERF_IDX2OFF(handle->head, buf);
+	}
+
+	/* If we've run out of free space, then nothing more to do */
+	if (!handle->size)
+		goto no_space;
+
+	/* Compute the tail and wakeup indices now that we've aligned head */
+	tail = PERF_IDX2OFF(handle->head + handle->size, buf);
+	wakeup = PERF_IDX2OFF(handle->wakeup, buf);
+
+	/*
+	 * Avoid clobbering unconsumed data. We know we have space, so
+	 * if we see head == tail we know that the buffer is empty. If
+	 * head > tail, then there's nothing to clobber prior to
+	 * wrapping.
+	 */
+	if (head < tail)
+		limit = round_down(tail, PAGE_SIZE);
+
+	/*
+	 * Wakeup may be arbitrarily far into the future. If it's not in
+	 * the current generation, either we'll wrap before hitting it,
+	 * or it's in the past and has been handled already.
+	 *
+	 * If there's a wakeup before we wrap, arrange to be woken up by
+	 * the page boundary following it. Keep the tail boundary if
+	 * that's lower.
+	 */
+	if (handle->wakeup < (handle->head + handle->size) && head <= wakeup)
+		limit = min(limit, round_up(wakeup, PAGE_SIZE));
+
+	if (limit > head)
+		return limit;
+
+	arm_spe_pmu_pad_buf(handle, handle->size);
+no_space:
+	perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
+	perf_aux_output_end(handle, 0);
+	return 0;
+}
+
+static u64 arm_spe_pmu_next_off(struct perf_output_handle *handle)
+{
+	struct arm_spe_pmu_buf *buf = perf_get_aux(handle);
+	struct arm_spe_pmu *spe_pmu = to_spe_pmu(handle->event->pmu);
+	u64 limit = __arm_spe_pmu_next_off(handle);
+	u64 head = PERF_IDX2OFF(handle->head, buf);
+
+	/*
+	 * If the head has come too close to the end of the buffer,
+	 * then pad to the end and recompute the limit.
+	 */
+	if (limit && (limit - head < spe_pmu->max_record_sz)) {
+		arm_spe_pmu_pad_buf(handle, limit - head);
+		limit = __arm_spe_pmu_next_off(handle);
+	}
+
+	return limit;
+}
+
+static void arm_spe_perf_aux_output_begin(struct perf_output_handle *handle,
+					  struct perf_event *event)
+{
+	u64 base, limit;
+	struct arm_spe_pmu_buf *buf;
+
+	/* Start a new aux session */
+	buf = perf_aux_output_begin(handle, event);
+	if (!buf) {
+		event->hw.state |= PERF_HES_STOPPED;
+		/*
+		 * We still need to clear the limit pointer, since the
+		 * profiler might only be disabled by virtue of a fault.
+		 */
+		limit = 0;
+		goto out_write_limit;
+	}
+
+	limit = buf->snapshot ? arm_spe_pmu_next_snapshot_off(handle)
+			      : arm_spe_pmu_next_off(handle);
+	if (limit)
+		limit |= PMBLIMITR_EL1_E;
+
+	limit += (u64)buf->base;
+	base = (u64)buf->base + PERF_IDX2OFF(handle->head, buf);
+	write_sysreg_s(base, SYS_PMBPTR_EL1);
+
+out_write_limit:
+	write_sysreg_s(limit, SYS_PMBLIMITR_EL1);
+}
+
+static void arm_spe_perf_aux_output_end(struct perf_output_handle *handle)
+{
+	struct arm_spe_pmu_buf *buf = perf_get_aux(handle);
+	u64 offset, size;
+
+	offset = read_sysreg_s(SYS_PMBPTR_EL1) - (u64)buf->base;
+	size = offset - PERF_IDX2OFF(handle->head, buf);
+
+	if (buf->snapshot)
+		handle->head = offset;
+
+	perf_aux_output_end(handle, size);
+}
+
+static void arm_spe_pmu_disable_and_drain_local(void)
+{
+	/* Disable profiling at EL0 and EL1 */
+	write_sysreg_s(0, SYS_PMSCR_EL1);
+	isb();
+
+	/* Drain any buffered data */
+	psb_csync();
+	dsb(nsh);
+
+	/* Disable the profiling buffer */
+	write_sysreg_s(0, SYS_PMBLIMITR_EL1);
+	isb();
+}
+
+/* IRQ handling */
+static enum arm_spe_pmu_buf_fault_action
+arm_spe_pmu_buf_get_fault_act(struct perf_output_handle *handle)
+{
+	const char *err_str;
+	u64 pmbsr;
+	enum arm_spe_pmu_buf_fault_action ret;
+
+	/*
+	 * Ensure new profiling data is visible to the CPU and any external
+	 * aborts have been resolved.
+	 */
+	psb_csync();
+	dsb(nsh);
+
+	/* Ensure hardware updates to PMBPTR_EL1 are visible */
+	isb();
+
+	/* Service required? */
+	pmbsr = read_sysreg_s(SYS_PMBSR_EL1);
+	if (!FIELD_GET(PMBSR_EL1_S, pmbsr))
+		return SPE_PMU_BUF_FAULT_ACT_SPURIOUS;
+
+	/*
+	 * If we've lost data, disable profiling and also set the PARTIAL
+	 * flag to indicate that the last record is corrupted.
+	 */
+	if (FIELD_GET(PMBSR_EL1_DL, pmbsr))
+		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED |
+					     PERF_AUX_FLAG_PARTIAL);
+
+	/* Report collisions to userspace so that it can up the period */
+	if (FIELD_GET(PMBSR_EL1_COLL, pmbsr))
+		perf_aux_output_flag(handle, PERF_AUX_FLAG_COLLISION);
+
+	/* We only expect buffer management events */
+	switch (FIELD_GET(PMBSR_EL1_EC, pmbsr)) {
+	case PMBSR_EL1_EC_BUF:
+		/* Handled below */
+		break;
+	case PMBSR_EL1_EC_FAULT_S1:
+	case PMBSR_EL1_EC_FAULT_S2:
+		err_str = "Unexpected buffer fault";
+		goto out_err;
+	default:
+		err_str = "Unknown error code";
+		goto out_err;
+	}
+
+	/* Buffer management event */
+	switch (FIELD_GET(PMBSR_EL1_BUF_BSC_MASK, pmbsr)) {
+	case PMBSR_EL1_BUF_BSC_FULL:
+		ret = SPE_PMU_BUF_FAULT_ACT_OK;
+		goto out_stop;
+	default:
+		err_str = "Unknown buffer status code";
+	}
+
+out_err:
+	pr_err_ratelimited("%s on CPU %d [PMBSR=0x%016llx, PMBPTR=0x%016llx, PMBLIMITR=0x%016llx]\n",
+			   err_str, smp_processor_id(), pmbsr,
+			   read_sysreg_s(SYS_PMBPTR_EL1),
+			   read_sysreg_s(SYS_PMBLIMITR_EL1));
+	ret = SPE_PMU_BUF_FAULT_ACT_FATAL;
+
+out_stop:
+	arm_spe_perf_aux_output_end(handle);
+	return ret;
+}
+
+static irqreturn_t arm_spe_pmu_irq_handler(int irq, void *dev)
+{
+	struct perf_output_handle *handle = dev;
+	struct perf_event *event = handle->event;
+	enum arm_spe_pmu_buf_fault_action act;
+
+	if (!perf_get_aux(handle))
+		return IRQ_NONE;
+
+	act = arm_spe_pmu_buf_get_fault_act(handle);
+	if (act == SPE_PMU_BUF_FAULT_ACT_SPURIOUS)
+		return IRQ_NONE;
+
+	/*
+	 * Ensure perf callbacks have completed, which may disable the
+	 * profiling buffer in response to a TRUNCATION flag.
+	 */
+	irq_work_run();
+
+	switch (act) {
+	case SPE_PMU_BUF_FAULT_ACT_FATAL:
+		/*
+		 * If a fatal exception occurred then leaving the profiling
+		 * buffer enabled is a recipe waiting to happen. Since
+		 * fatal faults don't always imply truncation, make sure
+		 * that the profiling buffer is disabled explicitly before
+		 * clearing the syndrome register.
+		 */
+		arm_spe_pmu_disable_and_drain_local();
+		break;
+	case SPE_PMU_BUF_FAULT_ACT_OK:
+		/*
+		 * We handled the fault (the buffer was full), so resume
+		 * profiling as long as we didn't detect truncation.
+		 * PMBPTR might be misaligned, but we'll burn that bridge
+		 * when we get to it.
+		 */
+		if (!(handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)) {
+			arm_spe_perf_aux_output_begin(handle, event);
+			isb();
+		}
+		break;
+	case SPE_PMU_BUF_FAULT_ACT_SPURIOUS:
+		/* We've seen you before, but GCC has the memory of a sieve. */
+		break;
+	}
+
+	/* The buffer pointers are now sane, so resume profiling. */
+	write_sysreg_s(0, SYS_PMBSR_EL1);
+	return IRQ_HANDLED;
+}
+
+static u64 arm_spe_pmsevfr_res0(u16 pmsver)
+{
+	switch (pmsver) {
+	case ID_AA64DFR0_EL1_PMSVer_IMP:
+		return PMSEVFR_EL1_RES0_IMP;
+	case ID_AA64DFR0_EL1_PMSVer_V1P1:
+		return PMSEVFR_EL1_RES0_V1P1;
+	case ID_AA64DFR0_EL1_PMSVer_V1P2:
+	/* Return the highest version we support in default */
+	default:
+		return PMSEVFR_EL1_RES0_V1P2;
+	}
+}
+
+/* Perf callbacks */
+static int arm_spe_pmu_event_init(struct perf_event *event)
+{
+	u64 reg;
+	struct perf_event_attr *attr = &event->attr;
+	struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu);
+
+	/* This is, of course, deeply driver-specific */
+	if (attr->type != event->pmu->type)
+		return -ENOENT;
+
+	if (event->cpu >= 0 &&
+	    !cpumask_test_cpu(event->cpu, &spe_pmu->supported_cpus))
+		return -ENOENT;
+
+	if (arm_spe_event_to_pmsevfr(event) & arm_spe_pmsevfr_res0(spe_pmu->pmsver))
+		return -EOPNOTSUPP;
+
+	if (arm_spe_event_to_pmsnevfr(event) & arm_spe_pmsevfr_res0(spe_pmu->pmsver))
+		return -EOPNOTSUPP;
+
+	if (attr->exclude_idle)
+		return -EOPNOTSUPP;
+
+	/*
+	 * Feedback-directed frequency throttling doesn't work when we
+	 * have a buffer of samples. We'd need to manually count the
+	 * samples in the buffer when it fills up and adjust the event
+	 * count to reflect that. Instead, just force the user to specify
+	 * a sample period.
+	 */
+	if (attr->freq)
+		return -EINVAL;
+
+	reg = arm_spe_event_to_pmsfcr(event);
+	if ((FIELD_GET(PMSFCR_EL1_FE, reg)) &&
+	    !(spe_pmu->features & SPE_PMU_FEAT_FILT_EVT))
+		return -EOPNOTSUPP;
+
+	if ((FIELD_GET(PMSFCR_EL1_FnE, reg)) &&
+	    !(spe_pmu->features & SPE_PMU_FEAT_INV_FILT_EVT))
+		return -EOPNOTSUPP;
+
+	if ((FIELD_GET(PMSFCR_EL1_FT, reg)) &&
+	    !(spe_pmu->features & SPE_PMU_FEAT_FILT_TYP))
+		return -EOPNOTSUPP;
+
+	if ((FIELD_GET(PMSFCR_EL1_FL, reg)) &&
+	    !(spe_pmu->features & SPE_PMU_FEAT_FILT_LAT))
+		return -EOPNOTSUPP;
+
+	set_spe_event_has_cx(event);
+	reg = arm_spe_event_to_pmscr(event);
+	if (!perfmon_capable() &&
+	    (reg & (PMSCR_EL1_PA | PMSCR_EL1_PCT)))
+		return -EACCES;
+
+	return 0;
+}
+
+static void arm_spe_pmu_start(struct perf_event *event, int flags)
+{
+	u64 reg;
+	struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	struct perf_output_handle *handle = this_cpu_ptr(spe_pmu->handle);
+
+	hwc->state = 0;
+	arm_spe_perf_aux_output_begin(handle, event);
+	if (hwc->state)
+		return;
+
+	reg = arm_spe_event_to_pmsfcr(event);
+	write_sysreg_s(reg, SYS_PMSFCR_EL1);
+
+	reg = arm_spe_event_to_pmsevfr(event);
+	write_sysreg_s(reg, SYS_PMSEVFR_EL1);
+
+	if (spe_pmu->features & SPE_PMU_FEAT_INV_FILT_EVT) {
+		reg = arm_spe_event_to_pmsnevfr(event);
+		write_sysreg_s(reg, SYS_PMSNEVFR_EL1);
+	}
+
+	reg = arm_spe_event_to_pmslatfr(event);
+	write_sysreg_s(reg, SYS_PMSLATFR_EL1);
+
+	if (flags & PERF_EF_RELOAD) {
+		reg = arm_spe_event_to_pmsirr(event);
+		write_sysreg_s(reg, SYS_PMSIRR_EL1);
+		isb();
+		reg = local64_read(&hwc->period_left);
+		write_sysreg_s(reg, SYS_PMSICR_EL1);
+	}
+
+	reg = arm_spe_event_to_pmscr(event);
+	isb();
+	write_sysreg_s(reg, SYS_PMSCR_EL1);
+}
+
+static void arm_spe_pmu_stop(struct perf_event *event, int flags)
+{
+	struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	struct perf_output_handle *handle = this_cpu_ptr(spe_pmu->handle);
+
+	/* If we're already stopped, then nothing to do */
+	if (hwc->state & PERF_HES_STOPPED)
+		return;
+
+	/* Stop all trace generation */
+	arm_spe_pmu_disable_and_drain_local();
+
+	if (flags & PERF_EF_UPDATE) {
+		/*
+		 * If there's a fault pending then ensure we contain it
+		 * to this buffer, since we might be on the context-switch
+		 * path.
+		 */
+		if (perf_get_aux(handle)) {
+			enum arm_spe_pmu_buf_fault_action act;
+
+			act = arm_spe_pmu_buf_get_fault_act(handle);
+			if (act == SPE_PMU_BUF_FAULT_ACT_SPURIOUS)
+				arm_spe_perf_aux_output_end(handle);
+			else
+				write_sysreg_s(0, SYS_PMBSR_EL1);
+		}
+
+		/*
+		 * This may also contain ECOUNT, but nobody else should
+		 * be looking at period_left, since we forbid frequency
+		 * based sampling.
+		 */
+		local64_set(&hwc->period_left, read_sysreg_s(SYS_PMSICR_EL1));
+		hwc->state |= PERF_HES_UPTODATE;
+	}
+
+	hwc->state |= PERF_HES_STOPPED;
+}
+
+static int arm_spe_pmu_add(struct perf_event *event, int flags)
+{
+	int ret = 0;
+	struct arm_spe_pmu *spe_pmu = to_spe_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int cpu = event->cpu == -1 ? smp_processor_id() : event->cpu;
+
+	if (!cpumask_test_cpu(cpu, &spe_pmu->supported_cpus))
+		return -ENOENT;
+
+	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+	if (flags & PERF_EF_START) {
+		arm_spe_pmu_start(event, PERF_EF_RELOAD);
+		if (hwc->state & PERF_HES_STOPPED)
+			ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static void arm_spe_pmu_del(struct perf_event *event, int flags)
+{
+	arm_spe_pmu_stop(event, PERF_EF_UPDATE);
+}
+
+static void arm_spe_pmu_read(struct perf_event *event)
+{
+}
+
+static void *arm_spe_pmu_setup_aux(struct perf_event *event, void **pages,
+				   int nr_pages, bool snapshot)
+{
+	int i, cpu = event->cpu;
+	struct page **pglist;
+	struct arm_spe_pmu_buf *buf;
+
+	/* We need at least two pages for this to work. */
+	if (nr_pages < 2)
+		return NULL;
+
+	/*
+	 * We require an even number of pages for snapshot mode, so that
+	 * we can effectively treat the buffer as consisting of two equal
+	 * parts and give userspace a fighting chance of getting some
+	 * useful data out of it.
+	 */
+	if (snapshot && (nr_pages & 1))
+		return NULL;
+
+	if (cpu == -1)
+		cpu = raw_smp_processor_id();
+
+	buf = kzalloc_node(sizeof(*buf), GFP_KERNEL, cpu_to_node(cpu));
+	if (!buf)
+		return NULL;
+
+	pglist = kcalloc(nr_pages, sizeof(*pglist), GFP_KERNEL);
+	if (!pglist)
+		goto out_free_buf;
+
+	for (i = 0; i < nr_pages; ++i)
+		pglist[i] = virt_to_page(pages[i]);
+
+	buf->base = vmap(pglist, nr_pages, VM_MAP, PAGE_KERNEL);
+	if (!buf->base)
+		goto out_free_pglist;
+
+	buf->nr_pages	= nr_pages;
+	buf->snapshot	= snapshot;
+
+	kfree(pglist);
+	return buf;
+
+out_free_pglist:
+	kfree(pglist);
+out_free_buf:
+	kfree(buf);
+	return NULL;
+}
+
+static void arm_spe_pmu_free_aux(void *aux)
+{
+	struct arm_spe_pmu_buf *buf = aux;
+
+	vunmap(buf->base);
+	kfree(buf);
+}
+
+/* Initialisation and teardown functions */
+static int arm_spe_pmu_perf_init(struct arm_spe_pmu *spe_pmu)
+{
+	static atomic_t pmu_idx = ATOMIC_INIT(-1);
+
+	int idx;
+	char *name;
+	struct device *dev = &spe_pmu->pdev->dev;
+
+	spe_pmu->pmu = (struct pmu) {
+		.module = THIS_MODULE,
+		.capabilities	= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE,
+		.attr_groups	= arm_spe_pmu_attr_groups,
+		/*
+		 * We hitch a ride on the software context here, so that
+		 * we can support per-task profiling (which is not possible
+		 * with the invalid context as it doesn't get sched callbacks).
+		 * This requires that userspace either uses a dummy event for
+		 * perf_event_open, since the aux buffer is not setup until
+		 * a subsequent mmap, or creates the profiling event in a
+		 * disabled state and explicitly PERF_EVENT_IOC_ENABLEs it
+		 * once the buffer has been created.
+		 */
+		.task_ctx_nr	= perf_sw_context,
+		.event_init	= arm_spe_pmu_event_init,
+		.add		= arm_spe_pmu_add,
+		.del		= arm_spe_pmu_del,
+		.start		= arm_spe_pmu_start,
+		.stop		= arm_spe_pmu_stop,
+		.read		= arm_spe_pmu_read,
+		.setup_aux	= arm_spe_pmu_setup_aux,
+		.free_aux	= arm_spe_pmu_free_aux,
+	};
+
+	idx = atomic_inc_return(&pmu_idx);
+	name = devm_kasprintf(dev, GFP_KERNEL, "%s_%d", PMUNAME, idx);
+	if (!name) {
+		dev_err(dev, "failed to allocate name for pmu %d\n", idx);
+		return -ENOMEM;
+	}
+
+	return perf_pmu_register(&spe_pmu->pmu, name, -1);
+}
+
+static void arm_spe_pmu_perf_destroy(struct arm_spe_pmu *spe_pmu)
+{
+	perf_pmu_unregister(&spe_pmu->pmu);
+}
+
+static void __arm_spe_pmu_dev_probe(void *info)
+{
+	int fld;
+	u64 reg;
+	struct arm_spe_pmu *spe_pmu = info;
+	struct device *dev = &spe_pmu->pdev->dev;
+
+	fld = cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64DFR0_EL1),
+						   ID_AA64DFR0_EL1_PMSVer_SHIFT);
+	if (!fld) {
+		dev_err(dev,
+			"unsupported ID_AA64DFR0_EL1.PMSVer [%d] on CPU %d\n",
+			fld, smp_processor_id());
+		return;
+	}
+	spe_pmu->pmsver = (u16)fld;
+
+	/* Read PMBIDR first to determine whether or not we have access */
+	reg = read_sysreg_s(SYS_PMBIDR_EL1);
+	if (FIELD_GET(PMBIDR_EL1_P, reg)) {
+		dev_err(dev,
+			"profiling buffer owned by higher exception level\n");
+		return;
+	}
+
+	/* Minimum alignment. If it's out-of-range, then fail the probe */
+	fld = FIELD_GET(PMBIDR_EL1_ALIGN, reg);
+	spe_pmu->align = 1 << fld;
+	if (spe_pmu->align > SZ_2K) {
+		dev_err(dev, "unsupported PMBIDR.Align [%d] on CPU %d\n",
+			fld, smp_processor_id());
+		return;
+	}
+
+	/* It's now safe to read PMSIDR and figure out what we've got */
+	reg = read_sysreg_s(SYS_PMSIDR_EL1);
+	if (FIELD_GET(PMSIDR_EL1_FE, reg))
+		spe_pmu->features |= SPE_PMU_FEAT_FILT_EVT;
+
+	if (FIELD_GET(PMSIDR_EL1_FnE, reg))
+		spe_pmu->features |= SPE_PMU_FEAT_INV_FILT_EVT;
+
+	if (FIELD_GET(PMSIDR_EL1_FT, reg))
+		spe_pmu->features |= SPE_PMU_FEAT_FILT_TYP;
+
+	if (FIELD_GET(PMSIDR_EL1_FL, reg))
+		spe_pmu->features |= SPE_PMU_FEAT_FILT_LAT;
+
+	if (FIELD_GET(PMSIDR_EL1_ARCHINST, reg))
+		spe_pmu->features |= SPE_PMU_FEAT_ARCH_INST;
+
+	if (FIELD_GET(PMSIDR_EL1_LDS, reg))
+		spe_pmu->features |= SPE_PMU_FEAT_LDS;
+
+	if (FIELD_GET(PMSIDR_EL1_ERND, reg))
+		spe_pmu->features |= SPE_PMU_FEAT_ERND;
+
+	/* This field has a spaced out encoding, so just use a look-up */
+	fld = FIELD_GET(PMSIDR_EL1_INTERVAL, reg);
+	switch (fld) {
+	case PMSIDR_EL1_INTERVAL_256:
+		spe_pmu->min_period = 256;
+		break;
+	case PMSIDR_EL1_INTERVAL_512:
+		spe_pmu->min_period = 512;
+		break;
+	case PMSIDR_EL1_INTERVAL_768:
+		spe_pmu->min_period = 768;
+		break;
+	case PMSIDR_EL1_INTERVAL_1024:
+		spe_pmu->min_period = 1024;
+		break;
+	case PMSIDR_EL1_INTERVAL_1536:
+		spe_pmu->min_period = 1536;
+		break;
+	case PMSIDR_EL1_INTERVAL_2048:
+		spe_pmu->min_period = 2048;
+		break;
+	case PMSIDR_EL1_INTERVAL_3072:
+		spe_pmu->min_period = 3072;
+		break;
+	default:
+		dev_warn(dev, "unknown PMSIDR_EL1.Interval [%d]; assuming 8\n",
+			 fld);
+		fallthrough;
+	case PMSIDR_EL1_INTERVAL_4096:
+		spe_pmu->min_period = 4096;
+	}
+
+	/* Maximum record size. If it's out-of-range, then fail the probe */
+	fld = FIELD_GET(PMSIDR_EL1_MAXSIZE, reg);
+	spe_pmu->max_record_sz = 1 << fld;
+	if (spe_pmu->max_record_sz > SZ_2K || spe_pmu->max_record_sz < 16) {
+		dev_err(dev, "unsupported PMSIDR_EL1.MaxSize [%d] on CPU %d\n",
+			fld, smp_processor_id());
+		return;
+	}
+
+	fld = FIELD_GET(PMSIDR_EL1_COUNTSIZE, reg);
+	switch (fld) {
+	default:
+		dev_warn(dev, "unknown PMSIDR_EL1.CountSize [%d]; assuming 2\n",
+			 fld);
+		fallthrough;
+	case PMSIDR_EL1_COUNTSIZE_12_BIT_SAT:
+		spe_pmu->counter_sz = 12;
+		break;
+	case PMSIDR_EL1_COUNTSIZE_16_BIT_SAT:
+		spe_pmu->counter_sz = 16;
+	}
+
+	dev_info(dev,
+		 "probed SPEv1.%d for CPUs %*pbl [max_record_sz %u, align %u, features 0x%llx]\n",
+		 spe_pmu->pmsver - 1, cpumask_pr_args(&spe_pmu->supported_cpus),
+		 spe_pmu->max_record_sz, spe_pmu->align, spe_pmu->features);
+
+	spe_pmu->features |= SPE_PMU_FEAT_DEV_PROBED;
+}
+
+static void __arm_spe_pmu_reset_local(void)
+{
+	/*
+	 * This is probably overkill, as we have no idea where we're
+	 * draining any buffered data to...
+	 */
+	arm_spe_pmu_disable_and_drain_local();
+
+	/* Reset the buffer base pointer */
+	write_sysreg_s(0, SYS_PMBPTR_EL1);
+	isb();
+
+	/* Clear any pending management interrupts */
+	write_sysreg_s(0, SYS_PMBSR_EL1);
+	isb();
+}
+
+static void __arm_spe_pmu_setup_one(void *info)
+{
+	struct arm_spe_pmu *spe_pmu = info;
+
+	__arm_spe_pmu_reset_local();
+	enable_percpu_irq(spe_pmu->irq, IRQ_TYPE_NONE);
+}
+
+static void __arm_spe_pmu_stop_one(void *info)
+{
+	struct arm_spe_pmu *spe_pmu = info;
+
+	disable_percpu_irq(spe_pmu->irq);
+	__arm_spe_pmu_reset_local();
+}
+
+static int arm_spe_pmu_cpu_startup(unsigned int cpu, struct hlist_node *node)
+{
+	struct arm_spe_pmu *spe_pmu;
+
+	spe_pmu = hlist_entry_safe(node, struct arm_spe_pmu, hotplug_node);
+	if (!cpumask_test_cpu(cpu, &spe_pmu->supported_cpus))
+		return 0;
+
+	__arm_spe_pmu_setup_one(spe_pmu);
+	return 0;
+}
+
+static int arm_spe_pmu_cpu_teardown(unsigned int cpu, struct hlist_node *node)
+{
+	struct arm_spe_pmu *spe_pmu;
+
+	spe_pmu = hlist_entry_safe(node, struct arm_spe_pmu, hotplug_node);
+	if (!cpumask_test_cpu(cpu, &spe_pmu->supported_cpus))
+		return 0;
+
+	__arm_spe_pmu_stop_one(spe_pmu);
+	return 0;
+}
+
+static int arm_spe_pmu_dev_init(struct arm_spe_pmu *spe_pmu)
+{
+	int ret;
+	cpumask_t *mask = &spe_pmu->supported_cpus;
+
+	/* Make sure we probe the hardware on a relevant CPU */
+	ret = smp_call_function_any(mask,  __arm_spe_pmu_dev_probe, spe_pmu, 1);
+	if (ret || !(spe_pmu->features & SPE_PMU_FEAT_DEV_PROBED))
+		return -ENXIO;
+
+	/* Request our PPIs (note that the IRQ is still disabled) */
+	ret = request_percpu_irq(spe_pmu->irq, arm_spe_pmu_irq_handler, DRVNAME,
+				 spe_pmu->handle);
+	if (ret)
+		return ret;
+
+	/*
+	 * Register our hotplug notifier now so we don't miss any events.
+	 * This will enable the IRQ for any supported CPUs that are already
+	 * up.
+	 */
+	ret = cpuhp_state_add_instance(arm_spe_pmu_online,
+				       &spe_pmu->hotplug_node);
+	if (ret)
+		free_percpu_irq(spe_pmu->irq, spe_pmu->handle);
+
+	return ret;
+}
+
+static void arm_spe_pmu_dev_teardown(struct arm_spe_pmu *spe_pmu)
+{
+	cpuhp_state_remove_instance(arm_spe_pmu_online, &spe_pmu->hotplug_node);
+	free_percpu_irq(spe_pmu->irq, spe_pmu->handle);
+}
+
+/* Driver and device probing */
+static int arm_spe_pmu_irq_probe(struct arm_spe_pmu *spe_pmu)
+{
+	struct platform_device *pdev = spe_pmu->pdev;
+	int irq = platform_get_irq(pdev, 0);
+
+	if (irq < 0)
+		return -ENXIO;
+
+	if (!irq_is_percpu(irq)) {
+		dev_err(&pdev->dev, "expected PPI but got SPI (%d)\n", irq);
+		return -EINVAL;
+	}
+
+	if (irq_get_percpu_devid_partition(irq, &spe_pmu->supported_cpus)) {
+		dev_err(&pdev->dev, "failed to get PPI partition (%d)\n", irq);
+		return -EINVAL;
+	}
+
+	spe_pmu->irq = irq;
+	return 0;
+}
+
+static const struct of_device_id arm_spe_pmu_of_match[] = {
+	{ .compatible = "arm,statistical-profiling-extension-v1", .data = (void *)1 },
+	{ /* Sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, arm_spe_pmu_of_match);
+
+static const struct platform_device_id arm_spe_match[] = {
+	{ ARMV8_SPE_PDEV_NAME, 0},
+	{ }
+};
+MODULE_DEVICE_TABLE(platform, arm_spe_match);
+
+static int arm_spe_pmu_device_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct arm_spe_pmu *spe_pmu;
+	struct device *dev = &pdev->dev;
+
+	/*
+	 * If kernelspace is unmapped when running at EL0, then the SPE
+	 * buffer will fault and prematurely terminate the AUX session.
+	 */
+	if (arm64_kernel_unmapped_at_el0()) {
+		dev_warn_once(dev, "profiling buffer inaccessible. Try passing \"kpti=off\" on the kernel command line\n");
+		return -EPERM;
+	}
+
+	spe_pmu = devm_kzalloc(dev, sizeof(*spe_pmu), GFP_KERNEL);
+	if (!spe_pmu)
+		return -ENOMEM;
+
+	spe_pmu->handle = alloc_percpu(typeof(*spe_pmu->handle));
+	if (!spe_pmu->handle)
+		return -ENOMEM;
+
+	spe_pmu->pdev = pdev;
+	platform_set_drvdata(pdev, spe_pmu);
+
+	ret = arm_spe_pmu_irq_probe(spe_pmu);
+	if (ret)
+		goto out_free_handle;
+
+	ret = arm_spe_pmu_dev_init(spe_pmu);
+	if (ret)
+		goto out_free_handle;
+
+	ret = arm_spe_pmu_perf_init(spe_pmu);
+	if (ret)
+		goto out_teardown_dev;
+
+	return 0;
+
+out_teardown_dev:
+	arm_spe_pmu_dev_teardown(spe_pmu);
+out_free_handle:
+	free_percpu(spe_pmu->handle);
+	return ret;
+}
+
+static int arm_spe_pmu_device_remove(struct platform_device *pdev)
+{
+	struct arm_spe_pmu *spe_pmu = platform_get_drvdata(pdev);
+
+	arm_spe_pmu_perf_destroy(spe_pmu);
+	arm_spe_pmu_dev_teardown(spe_pmu);
+	free_percpu(spe_pmu->handle);
+	return 0;
+}
+
+static struct platform_driver arm_spe_pmu_driver = {
+	.id_table = arm_spe_match,
+	.driver	= {
+		.name		= DRVNAME,
+		.of_match_table	= of_match_ptr(arm_spe_pmu_of_match),
+		.suppress_bind_attrs = true,
+	},
+	.probe	= arm_spe_pmu_device_probe,
+	.remove	= arm_spe_pmu_device_remove,
+};
+
+static int __init arm_spe_pmu_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, DRVNAME,
+				      arm_spe_pmu_cpu_startup,
+				      arm_spe_pmu_cpu_teardown);
+	if (ret < 0)
+		return ret;
+	arm_spe_pmu_online = ret;
+
+	ret = platform_driver_register(&arm_spe_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(arm_spe_pmu_online);
+
+	return ret;
+}
+
+static void __exit arm_spe_pmu_exit(void)
+{
+	platform_driver_unregister(&arm_spe_pmu_driver);
+	cpuhp_remove_multi_state(arm_spe_pmu_online);
+}
+
+module_init(arm_spe_pmu_init);
+module_exit(arm_spe_pmu_exit);
+
+MODULE_DESCRIPTION("Perf driver for the ARMv8.2 Statistical Profiling Extension");
+MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/cxl_pmu.c b/drivers/perf/cxl_pmu.c
new file mode 100644
index 0000000000..365d964b0f
--- /dev/null
+++ b/drivers/perf/cxl_pmu.c
@@ -0,0 +1,990 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright(c) 2023 Huawei
+ *
+ * The CXL 3.0 specification includes a standard Performance Monitoring Unit,
+ * called the CXL PMU, or CPMU. In order to allow a high degree of
+ * implementation flexibility the specification provides a wide range of
+ * options all of which are self describing.
+ *
+ * Details in CXL rev 3.0 section 8.2.7 CPMU Register Interface
+ */
+
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/perf_event.h>
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/bits.h>
+#include <linux/list.h>
+#include <linux/bug.h>
+#include <linux/pci.h>
+
+#include "../cxl/cxlpci.h"
+#include "../cxl/cxl.h"
+#include "../cxl/pmu.h"
+
+#define CXL_PMU_CAP_REG			0x0
+#define   CXL_PMU_CAP_NUM_COUNTERS_MSK			GENMASK_ULL(5, 0)
+#define   CXL_PMU_CAP_COUNTER_WIDTH_MSK			GENMASK_ULL(15, 8)
+#define   CXL_PMU_CAP_NUM_EVN_CAP_REG_SUP_MSK		GENMASK_ULL(24, 20)
+#define   CXL_PMU_CAP_FILTERS_SUP_MSK			GENMASK_ULL(39, 32)
+#define     CXL_PMU_FILTER_HDM				BIT(0)
+#define     CXL_PMU_FILTER_CHAN_RANK_BANK		BIT(1)
+#define   CXL_PMU_CAP_MSI_N_MSK				GENMASK_ULL(47, 44)
+#define   CXL_PMU_CAP_WRITEABLE_WHEN_FROZEN		BIT_ULL(48)
+#define   CXL_PMU_CAP_FREEZE				BIT_ULL(49)
+#define   CXL_PMU_CAP_INT				BIT_ULL(50)
+#define   CXL_PMU_CAP_VERSION_MSK			GENMASK_ULL(63, 60)
+
+#define CXL_PMU_OVERFLOW_REG		0x10
+#define CXL_PMU_FREEZE_REG		0x18
+#define CXL_PMU_EVENT_CAP_REG(n)	(0x100 + 8 * (n))
+#define   CXL_PMU_EVENT_CAP_SUPPORTED_EVENTS_MSK	GENMASK_ULL(31, 0)
+#define   CXL_PMU_EVENT_CAP_GROUP_ID_MSK		GENMASK_ULL(47, 32)
+#define   CXL_PMU_EVENT_CAP_VENDOR_ID_MSK		GENMASK_ULL(63, 48)
+
+#define CXL_PMU_COUNTER_CFG_REG(n)	(0x200 + 8 * (n))
+#define   CXL_PMU_COUNTER_CFG_TYPE_MSK			GENMASK_ULL(1, 0)
+#define     CXL_PMU_COUNTER_CFG_TYPE_FREE_RUN		0
+#define     CXL_PMU_COUNTER_CFG_TYPE_FIXED_FUN		1
+#define     CXL_PMU_COUNTER_CFG_TYPE_CONFIGURABLE	2
+#define   CXL_PMU_COUNTER_CFG_ENABLE			BIT_ULL(8)
+#define   CXL_PMU_COUNTER_CFG_INT_ON_OVRFLW		BIT_ULL(9)
+#define   CXL_PMU_COUNTER_CFG_FREEZE_ON_OVRFLW		BIT_ULL(10)
+#define   CXL_PMU_COUNTER_CFG_EDGE			BIT_ULL(11)
+#define   CXL_PMU_COUNTER_CFG_INVERT			BIT_ULL(12)
+#define   CXL_PMU_COUNTER_CFG_THRESHOLD_MSK		GENMASK_ULL(23, 16)
+#define   CXL_PMU_COUNTER_CFG_EVENTS_MSK		GENMASK_ULL(55, 24)
+#define   CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK	GENMASK_ULL(63, 59)
+
+#define CXL_PMU_FILTER_CFG_REG(n, f)	(0x400 + 4 * ((f) + (n) * 8))
+#define   CXL_PMU_FILTER_CFG_VALUE_MSK			GENMASK(15, 0)
+
+#define CXL_PMU_COUNTER_REG(n)		(0xc00 + 8 * (n))
+
+/* CXL rev 3.0 Table 13-5 Events under CXL Vendor ID */
+#define CXL_PMU_GID_CLOCK_TICKS		0x00
+#define CXL_PMU_GID_D2H_REQ		0x0010
+#define CXL_PMU_GID_D2H_RSP		0x0011
+#define CXL_PMU_GID_H2D_REQ		0x0012
+#define CXL_PMU_GID_H2D_RSP		0x0013
+#define CXL_PMU_GID_CACHE_DATA		0x0014
+#define CXL_PMU_GID_M2S_REQ		0x0020
+#define CXL_PMU_GID_M2S_RWD		0x0021
+#define CXL_PMU_GID_M2S_BIRSP		0x0022
+#define CXL_PMU_GID_S2M_BISNP		0x0023
+#define CXL_PMU_GID_S2M_NDR		0x0024
+#define CXL_PMU_GID_S2M_DRS		0x0025
+#define CXL_PMU_GID_DDR			0x8000
+
+static int cxl_pmu_cpuhp_state_num;
+
+struct cxl_pmu_ev_cap {
+	u16 vid;
+	u16 gid;
+	u32 msk;
+	union {
+		int counter_idx; /* fixed counters */
+		int event_idx; /* configurable counters */
+	};
+	struct list_head node;
+};
+
+#define CXL_PMU_MAX_COUNTERS 64
+struct cxl_pmu_info {
+	struct pmu pmu;
+	void __iomem *base;
+	struct perf_event **hw_events;
+	struct list_head event_caps_configurable;
+	struct list_head event_caps_fixed;
+	DECLARE_BITMAP(used_counter_bm, CXL_PMU_MAX_COUNTERS);
+	DECLARE_BITMAP(conf_counter_bm, CXL_PMU_MAX_COUNTERS);
+	u16 counter_width;
+	u8 num_counters;
+	u8 num_event_capabilities;
+	int on_cpu;
+	struct hlist_node node;
+	bool filter_hdm;
+	int irq;
+};
+
+#define pmu_to_cxl_pmu_info(_pmu) container_of(_pmu, struct cxl_pmu_info, pmu)
+
+/*
+ * All CPMU counters are discoverable via the Event Capabilities Registers.
+ * Each Event Capability register contains a a VID / GroupID.
+ * A counter may then count any combination (by summing) of events in
+ * that group which are in the Supported Events Bitmask.
+ * However, there are some complexities to the scheme.
+ *  - Fixed function counters refer to an Event Capabilities register.
+ *    That event capability register is not then used for Configurable
+ *    counters.
+ */
+static int cxl_pmu_parse_caps(struct device *dev, struct cxl_pmu_info *info)
+{
+	unsigned long fixed_counter_event_cap_bm = 0;
+	void __iomem *base = info->base;
+	bool freeze_for_enable;
+	u64 val, eval;
+	int i;
+
+	val = readq(base + CXL_PMU_CAP_REG);
+	freeze_for_enable = FIELD_GET(CXL_PMU_CAP_WRITEABLE_WHEN_FROZEN, val) &&
+		FIELD_GET(CXL_PMU_CAP_FREEZE, val);
+	if (!freeze_for_enable) {
+		dev_err(dev, "Counters not writable while frozen\n");
+		return -ENODEV;
+	}
+
+	info->num_counters = FIELD_GET(CXL_PMU_CAP_NUM_COUNTERS_MSK, val) + 1;
+	info->counter_width = FIELD_GET(CXL_PMU_CAP_COUNTER_WIDTH_MSK, val);
+	info->num_event_capabilities = FIELD_GET(CXL_PMU_CAP_NUM_EVN_CAP_REG_SUP_MSK, val) + 1;
+
+	info->filter_hdm = FIELD_GET(CXL_PMU_CAP_FILTERS_SUP_MSK, val) & CXL_PMU_FILTER_HDM;
+	if (FIELD_GET(CXL_PMU_CAP_INT, val))
+		info->irq = FIELD_GET(CXL_PMU_CAP_MSI_N_MSK, val);
+	else
+		info->irq = -1;
+
+	/* First handle fixed function counters; note if configurable counters found */
+	for (i = 0; i < info->num_counters; i++) {
+		struct cxl_pmu_ev_cap *pmu_ev;
+		u32 events_msk;
+		u8 group_idx;
+
+		val = readq(base + CXL_PMU_COUNTER_CFG_REG(i));
+
+		if (FIELD_GET(CXL_PMU_COUNTER_CFG_TYPE_MSK, val) ==
+			CXL_PMU_COUNTER_CFG_TYPE_CONFIGURABLE) {
+			set_bit(i, info->conf_counter_bm);
+		}
+
+		if (FIELD_GET(CXL_PMU_COUNTER_CFG_TYPE_MSK, val) !=
+		    CXL_PMU_COUNTER_CFG_TYPE_FIXED_FUN)
+			continue;
+
+		/* In this case we know which fields are const */
+		group_idx = FIELD_GET(CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK, val);
+		events_msk = FIELD_GET(CXL_PMU_COUNTER_CFG_EVENTS_MSK, val);
+		eval = readq(base + CXL_PMU_EVENT_CAP_REG(group_idx));
+		pmu_ev = devm_kzalloc(dev, sizeof(*pmu_ev), GFP_KERNEL);
+		if (!pmu_ev)
+			return -ENOMEM;
+
+		pmu_ev->vid = FIELD_GET(CXL_PMU_EVENT_CAP_VENDOR_ID_MSK, eval);
+		pmu_ev->gid = FIELD_GET(CXL_PMU_EVENT_CAP_GROUP_ID_MSK, eval);
+		/* For a fixed purpose counter use the events mask from the counter CFG */
+		pmu_ev->msk = events_msk;
+		pmu_ev->counter_idx = i;
+		/* This list add is never unwound as all entries deleted on remove */
+		list_add(&pmu_ev->node, &info->event_caps_fixed);
+		/*
+		 * Configurable counters must not use an Event Capability registers that
+		 * is in use for a Fixed counter
+		 */
+		set_bit(group_idx, &fixed_counter_event_cap_bm);
+	}
+
+	if (!bitmap_empty(info->conf_counter_bm, CXL_PMU_MAX_COUNTERS)) {
+		struct cxl_pmu_ev_cap *pmu_ev;
+		int j;
+		/* Walk event capabilities unused by fixed counters */
+		for_each_clear_bit(j, &fixed_counter_event_cap_bm,
+				   info->num_event_capabilities) {
+			pmu_ev = devm_kzalloc(dev, sizeof(*pmu_ev), GFP_KERNEL);
+			if (!pmu_ev)
+				return -ENOMEM;
+
+			eval = readq(base + CXL_PMU_EVENT_CAP_REG(j));
+			pmu_ev->vid = FIELD_GET(CXL_PMU_EVENT_CAP_VENDOR_ID_MSK, eval);
+			pmu_ev->gid = FIELD_GET(CXL_PMU_EVENT_CAP_GROUP_ID_MSK, eval);
+			pmu_ev->msk = FIELD_GET(CXL_PMU_EVENT_CAP_SUPPORTED_EVENTS_MSK, eval);
+			pmu_ev->event_idx = j;
+			list_add(&pmu_ev->node, &info->event_caps_configurable);
+		}
+	}
+
+	return 0;
+}
+
+static ssize_t cxl_pmu_format_sysfs_show(struct device *dev,
+					 struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *eattr;
+
+	eattr = container_of(attr, struct dev_ext_attribute, attr);
+
+	return sysfs_emit(buf, "%s\n", (char *)eattr->var);
+}
+
+#define CXL_PMU_FORMAT_ATTR(_name, _format)\
+	(&((struct dev_ext_attribute[]) {					\
+		{								\
+			.attr = __ATTR(_name, 0444,				\
+				       cxl_pmu_format_sysfs_show, NULL),	\
+			.var = (void *)_format					\
+		}								\
+		})[0].attr.attr)
+
+enum {
+	cxl_pmu_mask_attr,
+	cxl_pmu_gid_attr,
+	cxl_pmu_vid_attr,
+	cxl_pmu_threshold_attr,
+	cxl_pmu_invert_attr,
+	cxl_pmu_edge_attr,
+	cxl_pmu_hdm_filter_en_attr,
+	cxl_pmu_hdm_attr,
+};
+
+static struct attribute *cxl_pmu_format_attr[] = {
+	[cxl_pmu_mask_attr] = CXL_PMU_FORMAT_ATTR(mask, "config:0-31"),
+	[cxl_pmu_gid_attr] = CXL_PMU_FORMAT_ATTR(gid, "config:32-47"),
+	[cxl_pmu_vid_attr] = CXL_PMU_FORMAT_ATTR(vid, "config:48-63"),
+	[cxl_pmu_threshold_attr] = CXL_PMU_FORMAT_ATTR(threshold, "config1:0-15"),
+	[cxl_pmu_invert_attr] = CXL_PMU_FORMAT_ATTR(invert, "config1:16"),
+	[cxl_pmu_edge_attr] = CXL_PMU_FORMAT_ATTR(edge, "config1:17"),
+	[cxl_pmu_hdm_filter_en_attr] = CXL_PMU_FORMAT_ATTR(hdm_filter_en, "config1:18"),
+	[cxl_pmu_hdm_attr] = CXL_PMU_FORMAT_ATTR(hdm, "config2:0-15"),
+	NULL
+};
+
+#define CXL_PMU_ATTR_CONFIG_MASK_MSK		GENMASK_ULL(31, 0)
+#define CXL_PMU_ATTR_CONFIG_GID_MSK		GENMASK_ULL(47, 32)
+#define CXL_PMU_ATTR_CONFIG_VID_MSK		GENMASK_ULL(63, 48)
+#define CXL_PMU_ATTR_CONFIG1_THRESHOLD_MSK	GENMASK_ULL(15, 0)
+#define CXL_PMU_ATTR_CONFIG1_INVERT_MSK		BIT(16)
+#define CXL_PMU_ATTR_CONFIG1_EDGE_MSK		BIT(17)
+#define CXL_PMU_ATTR_CONFIG1_FILTER_EN_MSK	BIT(18)
+#define CXL_PMU_ATTR_CONFIG2_HDM_MSK		GENMASK(15, 0)
+
+static umode_t cxl_pmu_format_is_visible(struct kobject *kobj,
+					 struct attribute *attr, int a)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct cxl_pmu_info *info = dev_get_drvdata(dev);
+
+	/*
+	 * Filter capability at the CPMU level, so hide the attributes if the particular
+	 * filter is not supported.
+	 */
+	if (!info->filter_hdm &&
+	    (attr == cxl_pmu_format_attr[cxl_pmu_hdm_filter_en_attr] ||
+	     attr == cxl_pmu_format_attr[cxl_pmu_hdm_attr]))
+		return 0;
+
+	return attr->mode;
+}
+
+static const struct attribute_group cxl_pmu_format_group = {
+	.name = "format",
+	.attrs = cxl_pmu_format_attr,
+	.is_visible = cxl_pmu_format_is_visible,
+};
+
+static u32 cxl_pmu_config_get_mask(struct perf_event *event)
+{
+	return FIELD_GET(CXL_PMU_ATTR_CONFIG_MASK_MSK, event->attr.config);
+}
+
+static u16 cxl_pmu_config_get_gid(struct perf_event *event)
+{
+	return FIELD_GET(CXL_PMU_ATTR_CONFIG_GID_MSK, event->attr.config);
+}
+
+static u16 cxl_pmu_config_get_vid(struct perf_event *event)
+{
+	return FIELD_GET(CXL_PMU_ATTR_CONFIG_VID_MSK, event->attr.config);
+}
+
+static u8 cxl_pmu_config1_get_threshold(struct perf_event *event)
+{
+	return FIELD_GET(CXL_PMU_ATTR_CONFIG1_THRESHOLD_MSK, event->attr.config1);
+}
+
+static bool cxl_pmu_config1_get_invert(struct perf_event *event)
+{
+	return FIELD_GET(CXL_PMU_ATTR_CONFIG1_INVERT_MSK, event->attr.config1);
+}
+
+static bool cxl_pmu_config1_get_edge(struct perf_event *event)
+{
+	return FIELD_GET(CXL_PMU_ATTR_CONFIG1_EDGE_MSK, event->attr.config1);
+}
+
+/*
+ * CPMU specification allows for 8 filters, each with a 16 bit value...
+ * So we need to find 8x16bits to store it in.
+ * As the value used for disable is 0xffff, a separate enable switch
+ * is needed.
+ */
+
+static bool cxl_pmu_config1_hdm_filter_en(struct perf_event *event)
+{
+	return FIELD_GET(CXL_PMU_ATTR_CONFIG1_FILTER_EN_MSK, event->attr.config1);
+}
+
+static u16 cxl_pmu_config2_get_hdm_decoder(struct perf_event *event)
+{
+	return FIELD_GET(CXL_PMU_ATTR_CONFIG2_HDM_MSK, event->attr.config2);
+}
+
+static ssize_t cxl_pmu_event_sysfs_show(struct device *dev,
+					struct device_attribute *attr, char *buf)
+{
+	struct perf_pmu_events_attr *pmu_attr =
+		container_of(attr, struct perf_pmu_events_attr, attr);
+
+	return sysfs_emit(buf, "config=%#llx\n", pmu_attr->id);
+}
+
+#define CXL_PMU_EVENT_ATTR(_name, _vid, _gid, _msk)			\
+	PMU_EVENT_ATTR_ID(_name, cxl_pmu_event_sysfs_show,		\
+			  ((u64)(_vid) << 48) | ((u64)(_gid) << 32) | (u64)(_msk))
+
+/* For CXL spec defined events */
+#define CXL_PMU_EVENT_CXL_ATTR(_name, _gid, _msk)			\
+	CXL_PMU_EVENT_ATTR(_name, PCI_DVSEC_VENDOR_ID_CXL, _gid, _msk)
+
+static struct attribute *cxl_pmu_event_attrs[] = {
+	CXL_PMU_EVENT_CXL_ATTR(clock_ticks,			CXL_PMU_GID_CLOCK_TICKS, BIT(0)),
+	/* CXL rev 3.0 Table 3-17 - Device to Host Requests */
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdcurr,			CXL_PMU_GID_D2H_REQ, BIT(1)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdown,			CXL_PMU_GID_D2H_REQ, BIT(2)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdshared,		CXL_PMU_GID_D2H_REQ, BIT(3)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdany,			CXL_PMU_GID_D2H_REQ, BIT(4)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdownnodata,		CXL_PMU_GID_D2H_REQ, BIT(5)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_itomwr,			CXL_PMU_GID_D2H_REQ, BIT(6)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_wrcurr,			CXL_PMU_GID_D2H_REQ, BIT(7)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_clflush,			CXL_PMU_GID_D2H_REQ, BIT(8)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_cleanevict,		CXL_PMU_GID_D2H_REQ, BIT(9)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_dirtyevict,		CXL_PMU_GID_D2H_REQ, BIT(10)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_cleanevictnodata,	CXL_PMU_GID_D2H_REQ, BIT(11)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_wowrinv,			CXL_PMU_GID_D2H_REQ, BIT(12)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_wowrinvf,		CXL_PMU_GID_D2H_REQ, BIT(13)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_wrinv,			CXL_PMU_GID_D2H_REQ, BIT(14)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_req_cacheflushed,		CXL_PMU_GID_D2H_REQ, BIT(16)),
+	/* CXL rev 3.0 Table 3-20 - D2H Repsonse Encodings */
+	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspihiti,		CXL_PMU_GID_D2H_RSP, BIT(4)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspvhitv,		CXL_PMU_GID_D2H_RSP, BIT(6)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspihitse,		CXL_PMU_GID_D2H_RSP, BIT(5)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspshitse,		CXL_PMU_GID_D2H_RSP, BIT(1)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspsfwdm,		CXL_PMU_GID_D2H_RSP, BIT(7)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspifwdm,		CXL_PMU_GID_D2H_RSP, BIT(15)),
+	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspvfwdv,		CXL_PMU_GID_D2H_RSP, BIT(22)),
+	/* CXL rev 3.0 Table 3-21 - CXL.cache - Mapping of H2D Requests to D2H Responses */
+	CXL_PMU_EVENT_CXL_ATTR(h2d_req_snpdata,			CXL_PMU_GID_H2D_REQ, BIT(1)),
+	CXL_PMU_EVENT_CXL_ATTR(h2d_req_snpinv,			CXL_PMU_GID_H2D_REQ, BIT(2)),
+	CXL_PMU_EVENT_CXL_ATTR(h2d_req_snpcur,			CXL_PMU_GID_H2D_REQ, BIT(3)),
+	/* CXL rev 3.0 Table 3-22 - H2D Response Opcode Encodings */
+	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_writepull,		CXL_PMU_GID_H2D_RSP, BIT(1)),
+	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_go,			CXL_PMU_GID_H2D_RSP, BIT(4)),
+	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_gowritepull,		CXL_PMU_GID_H2D_RSP, BIT(5)),
+	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_extcmp,			CXL_PMU_GID_H2D_RSP, BIT(6)),
+	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_gowritepulldrop,		CXL_PMU_GID_H2D_RSP, BIT(8)),
+	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_fastgowritepull,		CXL_PMU_GID_H2D_RSP, BIT(13)),
+	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_goerrwritepull,		CXL_PMU_GID_H2D_RSP, BIT(15)),
+	/* CXL rev 3.0 Table 13-5 directly lists these */
+	CXL_PMU_EVENT_CXL_ATTR(cachedata_d2h_data,		CXL_PMU_GID_CACHE_DATA, BIT(0)),
+	CXL_PMU_EVENT_CXL_ATTR(cachedata_h2d_data,		CXL_PMU_GID_CACHE_DATA, BIT(1)),
+	/* CXL rev 3.0 Table 3-29 M2S Req Memory Opcodes */
+	CXL_PMU_EVENT_CXL_ATTR(m2s_req_meminv,			CXL_PMU_GID_M2S_REQ, BIT(0)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrd,			CXL_PMU_GID_M2S_REQ, BIT(1)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrddata,		CXL_PMU_GID_M2S_REQ, BIT(2)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrdfwd,		CXL_PMU_GID_M2S_REQ, BIT(3)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_req_memwrfwd,		CXL_PMU_GID_M2S_REQ, BIT(4)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_req_memspecrd,		CXL_PMU_GID_M2S_REQ, BIT(8)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_req_meminvnt,		CXL_PMU_GID_M2S_REQ, BIT(9)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_req_memcleanevict,		CXL_PMU_GID_M2S_REQ, BIT(10)),
+	/* CXL rev 3.0 Table 3-35 M2S RwD Memory Opcodes */
+	CXL_PMU_EVENT_CXL_ATTR(m2s_rwd_memwr,			CXL_PMU_GID_M2S_RWD, BIT(1)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_rwd_memwrptl,		CXL_PMU_GID_M2S_RWD, BIT(2)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_rwd_biconflict,		CXL_PMU_GID_M2S_RWD, BIT(4)),
+	/* CXL rev 3.0 Table 3-38 M2S BIRsp Memory Opcodes */
+	CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_i,			CXL_PMU_GID_M2S_BIRSP, BIT(0)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_s,			CXL_PMU_GID_M2S_BIRSP, BIT(1)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_e,			CXL_PMU_GID_M2S_BIRSP, BIT(2)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_iblk,			CXL_PMU_GID_M2S_BIRSP, BIT(4)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_sblk,			CXL_PMU_GID_M2S_BIRSP, BIT(5)),
+	CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_eblk,			CXL_PMU_GID_M2S_BIRSP, BIT(6)),
+	/* CXL rev 3.0 Table 3-40 S2M BISnp Opcodes */
+	CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_cur,			CXL_PMU_GID_S2M_BISNP, BIT(0)),
+	CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_data,			CXL_PMU_GID_S2M_BISNP, BIT(1)),
+	CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_inv,			CXL_PMU_GID_S2M_BISNP, BIT(2)),
+	CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_curblk,		CXL_PMU_GID_S2M_BISNP, BIT(4)),
+	CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_datblk,		CXL_PMU_GID_S2M_BISNP, BIT(5)),
+	CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_invblk,		CXL_PMU_GID_S2M_BISNP, BIT(6)),
+	/* CXL rev 3.0 Table 3-43 S2M NDR Opcopdes */
+	CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmp,			CXL_PMU_GID_S2M_NDR, BIT(0)),
+	CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmps,			CXL_PMU_GID_S2M_NDR, BIT(1)),
+	CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmpe,			CXL_PMU_GID_S2M_NDR, BIT(2)),
+	CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_biconflictack,		CXL_PMU_GID_S2M_NDR, BIT(3)),
+	/* CXL rev 3.0 Table 3-46 S2M DRS opcodes */
+	CXL_PMU_EVENT_CXL_ATTR(s2m_drs_memdata,			CXL_PMU_GID_S2M_DRS, BIT(0)),
+	CXL_PMU_EVENT_CXL_ATTR(s2m_drs_memdatanxm,		CXL_PMU_GID_S2M_DRS, BIT(1)),
+	/* CXL rev 3.0 Table 13-5 directly lists these */
+	CXL_PMU_EVENT_CXL_ATTR(ddr_act,				CXL_PMU_GID_DDR, BIT(0)),
+	CXL_PMU_EVENT_CXL_ATTR(ddr_pre,				CXL_PMU_GID_DDR, BIT(1)),
+	CXL_PMU_EVENT_CXL_ATTR(ddr_casrd,			CXL_PMU_GID_DDR, BIT(2)),
+	CXL_PMU_EVENT_CXL_ATTR(ddr_caswr,			CXL_PMU_GID_DDR, BIT(3)),
+	CXL_PMU_EVENT_CXL_ATTR(ddr_refresh,			CXL_PMU_GID_DDR, BIT(4)),
+	CXL_PMU_EVENT_CXL_ATTR(ddr_selfrefreshent,		CXL_PMU_GID_DDR, BIT(5)),
+	CXL_PMU_EVENT_CXL_ATTR(ddr_rfm,				CXL_PMU_GID_DDR, BIT(6)),
+	NULL
+};
+
+static struct cxl_pmu_ev_cap *cxl_pmu_find_fixed_counter_ev_cap(struct cxl_pmu_info *info,
+								int vid, int gid, int msk)
+{
+	struct cxl_pmu_ev_cap *pmu_ev;
+
+	list_for_each_entry(pmu_ev, &info->event_caps_fixed, node) {
+		if (vid != pmu_ev->vid || gid != pmu_ev->gid)
+			continue;
+
+		/* Precise match for fixed counter */
+		if (msk == pmu_ev->msk)
+			return pmu_ev;
+	}
+
+	return ERR_PTR(-EINVAL);
+}
+
+static struct cxl_pmu_ev_cap *cxl_pmu_find_config_counter_ev_cap(struct cxl_pmu_info *info,
+								 int vid, int gid, int msk)
+{
+	struct cxl_pmu_ev_cap *pmu_ev;
+
+	list_for_each_entry(pmu_ev, &info->event_caps_configurable, node) {
+		if (vid != pmu_ev->vid || gid != pmu_ev->gid)
+			continue;
+
+		/* Request mask must be subset of supported */
+		if (msk & ~pmu_ev->msk)
+			continue;
+
+		return pmu_ev;
+	}
+
+	return ERR_PTR(-EINVAL);
+}
+
+static umode_t cxl_pmu_event_is_visible(struct kobject *kobj, struct attribute *attr, int a)
+{
+	struct device_attribute *dev_attr = container_of(attr, struct device_attribute, attr);
+	struct perf_pmu_events_attr *pmu_attr =
+		container_of(dev_attr, struct perf_pmu_events_attr, attr);
+	struct device *dev = kobj_to_dev(kobj);
+	struct cxl_pmu_info *info = dev_get_drvdata(dev);
+	int vid = FIELD_GET(CXL_PMU_ATTR_CONFIG_VID_MSK, pmu_attr->id);
+	int gid = FIELD_GET(CXL_PMU_ATTR_CONFIG_GID_MSK, pmu_attr->id);
+	int msk = FIELD_GET(CXL_PMU_ATTR_CONFIG_MASK_MSK, pmu_attr->id);
+
+	if (!IS_ERR(cxl_pmu_find_fixed_counter_ev_cap(info, vid, gid, msk)))
+		return attr->mode;
+
+	if (!IS_ERR(cxl_pmu_find_config_counter_ev_cap(info, vid, gid, msk)))
+		return attr->mode;
+
+	return 0;
+}
+
+static const struct attribute_group cxl_pmu_events = {
+	.name = "events",
+	.attrs = cxl_pmu_event_attrs,
+	.is_visible = cxl_pmu_event_is_visible,
+};
+
+static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct cxl_pmu_info *info = dev_get_drvdata(dev);
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(info->on_cpu));
+}
+static DEVICE_ATTR_RO(cpumask);
+
+static struct attribute *cxl_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL
+};
+
+static const struct attribute_group cxl_pmu_cpumask_group = {
+	.attrs = cxl_pmu_cpumask_attrs,
+};
+
+static const struct attribute_group *cxl_pmu_attr_groups[] = {
+	&cxl_pmu_events,
+	&cxl_pmu_format_group,
+	&cxl_pmu_cpumask_group,
+	NULL
+};
+
+/* If counter_idx == NULL, don't try to allocate a counter. */
+static int cxl_pmu_get_event_idx(struct perf_event *event, int *counter_idx,
+				 int *event_idx)
+{
+	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
+	DECLARE_BITMAP(configurable_and_free, CXL_PMU_MAX_COUNTERS);
+	struct cxl_pmu_ev_cap *pmu_ev;
+	u32 mask;
+	u16 gid, vid;
+	int i;
+
+	vid = cxl_pmu_config_get_vid(event);
+	gid = cxl_pmu_config_get_gid(event);
+	mask = cxl_pmu_config_get_mask(event);
+
+	pmu_ev = cxl_pmu_find_fixed_counter_ev_cap(info, vid, gid, mask);
+	if (!IS_ERR(pmu_ev)) {
+		if (!counter_idx)
+			return 0;
+		if (!test_bit(pmu_ev->counter_idx, info->used_counter_bm)) {
+			*counter_idx = pmu_ev->counter_idx;
+			return 0;
+		}
+		/* Fixed counter is in use, but maybe a configurable one? */
+	}
+
+	pmu_ev = cxl_pmu_find_config_counter_ev_cap(info, vid, gid, mask);
+	if (!IS_ERR(pmu_ev)) {
+		if (!counter_idx)
+			return 0;
+
+		bitmap_andnot(configurable_and_free, info->conf_counter_bm,
+			info->used_counter_bm, CXL_PMU_MAX_COUNTERS);
+
+		i = find_first_bit(configurable_and_free, CXL_PMU_MAX_COUNTERS);
+		if (i == CXL_PMU_MAX_COUNTERS)
+			return -EINVAL;
+
+		*counter_idx = i;
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int cxl_pmu_event_init(struct perf_event *event)
+{
+	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
+	int rc;
+
+	/* Top level type sanity check - is this a Hardware Event being requested */
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+		return -EOPNOTSUPP;
+	/* TODO: Validation of any filter */
+
+	/*
+	 * Verify that it is possible to count what was requested. Either must
+	 * be a fixed counter that is a precise match or a configurable counter
+	 * where this is a subset.
+	 */
+	rc = cxl_pmu_get_event_idx(event, NULL, NULL);
+	if (rc < 0)
+		return rc;
+
+	event->cpu = info->on_cpu;
+
+	return 0;
+}
+
+static void cxl_pmu_enable(struct pmu *pmu)
+{
+	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(pmu);
+	void __iomem *base = info->base;
+
+	/* Can assume frozen at this stage */
+	writeq(0, base + CXL_PMU_FREEZE_REG);
+}
+
+static void cxl_pmu_disable(struct pmu *pmu)
+{
+	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(pmu);
+	void __iomem *base = info->base;
+
+	/*
+	 * Whilst bits above number of counters are RsvdZ
+	 * they are unlikely to be repurposed given
+	 * number of counters is allowed to be 64 leaving
+	 * no reserved bits.  Hence this is only slightly
+	 * naughty.
+	 */
+	writeq(GENMASK_ULL(63, 0), base + CXL_PMU_FREEZE_REG);
+}
+
+static void cxl_pmu_event_start(struct perf_event *event, int flags)
+{
+	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	void __iomem *base = info->base;
+	u64 cfg;
+
+	/*
+	 * All paths to here should either set these flags directly or
+	 * call cxl_pmu_event_stop() which will ensure the correct state.
+	 */
+	if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+		return;
+
+	WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+	hwc->state = 0;
+
+	/*
+	 * Currently only hdm filter control is implemnted, this code will
+	 * want generalizing when more filters are added.
+	 */
+	if (info->filter_hdm) {
+		if (cxl_pmu_config1_hdm_filter_en(event))
+			cfg = cxl_pmu_config2_get_hdm_decoder(event);
+		else
+			cfg = GENMASK(15, 0); /* No filtering if 0xFFFF_FFFF */
+		writeq(cfg, base + CXL_PMU_FILTER_CFG_REG(hwc->idx, 0));
+	}
+
+	cfg = readq(base + CXL_PMU_COUNTER_CFG_REG(hwc->idx));
+	cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_INT_ON_OVRFLW, 1);
+	cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_FREEZE_ON_OVRFLW, 1);
+	cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_ENABLE, 1);
+	cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_EDGE,
+			  cxl_pmu_config1_get_edge(event) ? 1 : 0);
+	cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_INVERT,
+			  cxl_pmu_config1_get_invert(event) ? 1 : 0);
+
+	/* Fixed purpose counters have next two fields RO */
+	if (test_bit(hwc->idx, info->conf_counter_bm)) {
+		cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK,
+				  hwc->event_base);
+		cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_EVENTS_MSK,
+				  cxl_pmu_config_get_mask(event));
+	}
+	cfg &= ~CXL_PMU_COUNTER_CFG_THRESHOLD_MSK;
+	/*
+	 * For events that generate only 1 count per clock the CXL 3.0 spec
+	 * states the threshold shall be set to 1 but if set to 0 it will
+	 * count the raw value anwyay?
+	 * There is no definition of what events will count multiple per cycle
+	 * and hence to which non 1 values of threshold can apply.
+	 * (CXL 3.0 8.2.7.2.1 Counter Configuration - threshold field definition)
+	 */
+	cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_THRESHOLD_MSK,
+			  cxl_pmu_config1_get_threshold(event));
+	writeq(cfg, base + CXL_PMU_COUNTER_CFG_REG(hwc->idx));
+
+	local64_set(&hwc->prev_count, 0);
+	writeq(0, base + CXL_PMU_COUNTER_REG(hwc->idx));
+
+	perf_event_update_userpage(event);
+}
+
+static u64 cxl_pmu_read_counter(struct perf_event *event)
+{
+	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
+	void __iomem *base = info->base;
+
+	return readq(base + CXL_PMU_COUNTER_REG(event->hw.idx));
+}
+
+static void __cxl_pmu_read(struct perf_event *event, bool overflow)
+{
+	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 new_cnt, prev_cnt, delta;
+
+	do {
+		prev_cnt = local64_read(&hwc->prev_count);
+		new_cnt = cxl_pmu_read_counter(event);
+	} while (local64_cmpxchg(&hwc->prev_count, prev_cnt, new_cnt) != prev_cnt);
+
+	/*
+	 * If we know an overflow occur then take that into account.
+	 * Note counter is not reset as that would lose events
+	 */
+	delta = (new_cnt - prev_cnt) & GENMASK_ULL(info->counter_width - 1, 0);
+	if (overflow && delta < GENMASK_ULL(info->counter_width - 1, 0))
+		delta += (1UL << info->counter_width);
+
+	local64_add(delta, &event->count);
+}
+
+static void cxl_pmu_read(struct perf_event *event)
+{
+	__cxl_pmu_read(event, false);
+}
+
+static void cxl_pmu_event_stop(struct perf_event *event, int flags)
+{
+	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
+	void __iomem *base = info->base;
+	struct hw_perf_event *hwc = &event->hw;
+	u64 cfg;
+
+	cxl_pmu_read(event);
+	WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+	hwc->state |= PERF_HES_STOPPED;
+
+	cfg = readq(base + CXL_PMU_COUNTER_CFG_REG(hwc->idx));
+	cfg &= ~(FIELD_PREP(CXL_PMU_COUNTER_CFG_INT_ON_OVRFLW, 1) |
+		 FIELD_PREP(CXL_PMU_COUNTER_CFG_ENABLE, 1));
+	writeq(cfg, base + CXL_PMU_COUNTER_CFG_REG(hwc->idx));
+
+	hwc->state |= PERF_HES_UPTODATE;
+}
+
+static int cxl_pmu_event_add(struct perf_event *event, int flags)
+{
+	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx, rc;
+	int event_idx = 0;
+
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+	rc = cxl_pmu_get_event_idx(event, &idx, &event_idx);
+	if (rc < 0)
+		return rc;
+
+	hwc->idx = idx;
+
+	/* Only set for configurable counters */
+	hwc->event_base = event_idx;
+	info->hw_events[idx] = event;
+	set_bit(idx, info->used_counter_bm);
+
+	if (flags & PERF_EF_START)
+		cxl_pmu_event_start(event, PERF_EF_RELOAD);
+
+	return 0;
+}
+
+static void cxl_pmu_event_del(struct perf_event *event, int flags)
+{
+	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	cxl_pmu_event_stop(event, PERF_EF_UPDATE);
+	clear_bit(hwc->idx, info->used_counter_bm);
+	info->hw_events[hwc->idx] = NULL;
+	perf_event_update_userpage(event);
+}
+
+static irqreturn_t cxl_pmu_irq(int irq, void *data)
+{
+	struct cxl_pmu_info *info = data;
+	void __iomem *base = info->base;
+	u64 overflowed;
+	DECLARE_BITMAP(overflowedbm, 64);
+	int i;
+
+	overflowed = readq(base + CXL_PMU_OVERFLOW_REG);
+
+	/* Interrupt may be shared, so maybe it isn't ours */
+	if (!overflowed)
+		return IRQ_NONE;
+
+	bitmap_from_arr64(overflowedbm, &overflowed, 64);
+	for_each_set_bit(i, overflowedbm, info->num_counters) {
+		struct perf_event *event = info->hw_events[i];
+
+		if (!event) {
+			dev_dbg(info->pmu.dev,
+				"overflow but on non enabled counter %d\n", i);
+			continue;
+		}
+
+		__cxl_pmu_read(event, true);
+	}
+
+	writeq(overflowed, base + CXL_PMU_OVERFLOW_REG);
+
+	return IRQ_HANDLED;
+}
+
+static void cxl_pmu_perf_unregister(void *_info)
+{
+	struct cxl_pmu_info *info = _info;
+
+	perf_pmu_unregister(&info->pmu);
+}
+
+static void cxl_pmu_cpuhp_remove(void *_info)
+{
+	struct cxl_pmu_info *info = _info;
+
+	cpuhp_state_remove_instance_nocalls(cxl_pmu_cpuhp_state_num, &info->node);
+}
+
+static int cxl_pmu_probe(struct device *dev)
+{
+	struct cxl_pmu *pmu = to_cxl_pmu(dev);
+	struct pci_dev *pdev = to_pci_dev(dev->parent);
+	struct cxl_pmu_info *info;
+	char *irq_name;
+	char *dev_name;
+	int rc, irq;
+
+	info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	dev_set_drvdata(dev, info);
+	INIT_LIST_HEAD(&info->event_caps_fixed);
+	INIT_LIST_HEAD(&info->event_caps_configurable);
+
+	info->base = pmu->base;
+
+	info->on_cpu = -1;
+	rc = cxl_pmu_parse_caps(dev, info);
+	if (rc)
+		return rc;
+
+	info->hw_events = devm_kcalloc(dev, sizeof(*info->hw_events),
+				       info->num_counters, GFP_KERNEL);
+	if (!info->hw_events)
+		return -ENOMEM;
+
+	switch (pmu->type) {
+	case CXL_PMU_MEMDEV:
+		dev_name = devm_kasprintf(dev, GFP_KERNEL, "cxl_pmu_mem%d.%d",
+					  pmu->assoc_id, pmu->index);
+		break;
+	}
+	if (!dev_name)
+		return -ENOMEM;
+
+	info->pmu = (struct pmu) {
+		.name = dev_name,
+		.parent = dev,
+		.module = THIS_MODULE,
+		.event_init = cxl_pmu_event_init,
+		.pmu_enable = cxl_pmu_enable,
+		.pmu_disable = cxl_pmu_disable,
+		.add = cxl_pmu_event_add,
+		.del = cxl_pmu_event_del,
+		.start = cxl_pmu_event_start,
+		.stop = cxl_pmu_event_stop,
+		.read = cxl_pmu_read,
+		.task_ctx_nr = perf_invalid_context,
+		.attr_groups = cxl_pmu_attr_groups,
+		.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+	};
+
+	if (info->irq <= 0)
+		return -EINVAL;
+
+	rc = pci_irq_vector(pdev, info->irq);
+	if (rc < 0)
+		return rc;
+	irq = rc;
+
+	irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_overflow\n", dev_name);
+	if (!irq_name)
+		return -ENOMEM;
+
+	rc = devm_request_irq(dev, irq, cxl_pmu_irq, IRQF_SHARED | IRQF_ONESHOT,
+			      irq_name, info);
+	if (rc)
+		return rc;
+	info->irq = irq;
+
+	rc = cpuhp_state_add_instance(cxl_pmu_cpuhp_state_num, &info->node);
+	if (rc)
+		return rc;
+
+	rc = devm_add_action_or_reset(dev, cxl_pmu_cpuhp_remove, info);
+	if (rc)
+		return rc;
+
+	rc = perf_pmu_register(&info->pmu, info->pmu.name, -1);
+	if (rc)
+		return rc;
+
+	rc = devm_add_action_or_reset(dev, cxl_pmu_perf_unregister, info);
+	if (rc)
+		return rc;
+
+	return 0;
+}
+
+static struct cxl_driver cxl_pmu_driver = {
+	.name = "cxl_pmu",
+	.probe = cxl_pmu_probe,
+	.id = CXL_DEVICE_PMU,
+};
+
+static int cxl_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct cxl_pmu_info *info = hlist_entry_safe(node, struct cxl_pmu_info, node);
+
+	if (info->on_cpu != -1)
+		return 0;
+
+	info->on_cpu = cpu;
+	/*
+	 * CPU HP lock is held so we should be guaranteed that the CPU hasn't yet
+	 * gone away again.
+	 */
+	WARN_ON(irq_set_affinity(info->irq, cpumask_of(cpu)));
+
+	return 0;
+}
+
+static int cxl_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct cxl_pmu_info *info = hlist_entry_safe(node, struct cxl_pmu_info, node);
+	unsigned int target;
+
+	if (info->on_cpu != cpu)
+		return 0;
+
+	info->on_cpu = -1;
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids) {
+		dev_err(info->pmu.dev, "Unable to find a suitable CPU\n");
+		return 0;
+	}
+
+	perf_pmu_migrate_context(&info->pmu, cpu, target);
+	info->on_cpu = target;
+	/*
+	 * CPU HP lock is held so we should be guaranteed that this CPU hasn't yet
+	 * gone away.
+	 */
+	WARN_ON(irq_set_affinity(info->irq, cpumask_of(target)));
+
+	return 0;
+}
+
+static __init int cxl_pmu_init(void)
+{
+	int rc;
+
+	rc = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+				     "AP_PERF_CXL_PMU_ONLINE",
+				     cxl_pmu_online_cpu, cxl_pmu_offline_cpu);
+	if (rc < 0)
+		return rc;
+	cxl_pmu_cpuhp_state_num = rc;
+
+	rc = cxl_driver_register(&cxl_pmu_driver);
+	if (rc)
+		cpuhp_remove_multi_state(cxl_pmu_cpuhp_state_num);
+
+	return rc;
+}
+
+static __exit void cxl_pmu_exit(void)
+{
+	cxl_driver_unregister(&cxl_pmu_driver);
+	cpuhp_remove_multi_state(cxl_pmu_cpuhp_state_num);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(CXL);
+module_init(cxl_pmu_init);
+module_exit(cxl_pmu_exit);
+MODULE_ALIAS_CXL(CXL_DEVICE_PMU);
diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
new file mode 100644
index 0000000000..92611c9812
--- /dev/null
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -0,0 +1,808 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2017 NXP
+ * Copyright 2016 Freescale Semiconductor, Inc.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#define COUNTER_CNTL		0x0
+#define COUNTER_READ		0x20
+
+#define COUNTER_DPCR1		0x30
+
+#define CNTL_OVER		0x1
+#define CNTL_CLEAR		0x2
+#define CNTL_EN			0x4
+#define CNTL_EN_MASK		0xFFFFFFFB
+#define CNTL_CLEAR_MASK		0xFFFFFFFD
+#define CNTL_OVER_MASK		0xFFFFFFFE
+
+#define CNTL_CP_SHIFT		16
+#define CNTL_CP_MASK		(0xFF << CNTL_CP_SHIFT)
+#define CNTL_CSV_SHIFT		24
+#define CNTL_CSV_MASK		(0xFFU << CNTL_CSV_SHIFT)
+
+#define EVENT_CYCLES_ID		0
+#define EVENT_CYCLES_COUNTER	0
+#define NUM_COUNTERS		4
+
+/* For removing bias if cycle counter CNTL.CP is set to 0xf0 */
+#define CYCLES_COUNTER_MASK	0x0FFFFFFF
+#define AXI_MASKING_REVERT	0xffff0000	/* AXI_MASKING(MSB 16bits) + AXI_ID(LSB 16bits) */
+
+#define to_ddr_pmu(p)		container_of(p, struct ddr_pmu, pmu)
+
+#define DDR_PERF_DEV_NAME	"imx8_ddr"
+#define DDR_CPUHP_CB_NAME	DDR_PERF_DEV_NAME "_perf_pmu"
+
+static DEFINE_IDA(ddr_ida);
+
+/* DDR Perf hardware feature */
+#define DDR_CAP_AXI_ID_FILTER			0x1     /* support AXI ID filter */
+#define DDR_CAP_AXI_ID_FILTER_ENHANCED		0x3     /* support enhanced AXI ID filter */
+
+struct fsl_ddr_devtype_data {
+	unsigned int quirks;    /* quirks needed for different DDR Perf core */
+	const char *identifier;	/* system PMU identifier for userspace */
+};
+
+static const struct fsl_ddr_devtype_data imx8_devtype_data;
+
+static const struct fsl_ddr_devtype_data imx8m_devtype_data = {
+	.quirks = DDR_CAP_AXI_ID_FILTER,
+};
+
+static const struct fsl_ddr_devtype_data imx8mq_devtype_data = {
+	.quirks = DDR_CAP_AXI_ID_FILTER,
+	.identifier = "i.MX8MQ",
+};
+
+static const struct fsl_ddr_devtype_data imx8mm_devtype_data = {
+	.quirks = DDR_CAP_AXI_ID_FILTER,
+	.identifier = "i.MX8MM",
+};
+
+static const struct fsl_ddr_devtype_data imx8mn_devtype_data = {
+	.quirks = DDR_CAP_AXI_ID_FILTER,
+	.identifier = "i.MX8MN",
+};
+
+static const struct fsl_ddr_devtype_data imx8mp_devtype_data = {
+	.quirks = DDR_CAP_AXI_ID_FILTER_ENHANCED,
+	.identifier = "i.MX8MP",
+};
+
+static const struct of_device_id imx_ddr_pmu_dt_ids[] = {
+	{ .compatible = "fsl,imx8-ddr-pmu", .data = &imx8_devtype_data},
+	{ .compatible = "fsl,imx8m-ddr-pmu", .data = &imx8m_devtype_data},
+	{ .compatible = "fsl,imx8mq-ddr-pmu", .data = &imx8mq_devtype_data},
+	{ .compatible = "fsl,imx8mm-ddr-pmu", .data = &imx8mm_devtype_data},
+	{ .compatible = "fsl,imx8mn-ddr-pmu", .data = &imx8mn_devtype_data},
+	{ .compatible = "fsl,imx8mp-ddr-pmu", .data = &imx8mp_devtype_data},
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, imx_ddr_pmu_dt_ids);
+
+struct ddr_pmu {
+	struct pmu pmu;
+	void __iomem *base;
+	unsigned int cpu;
+	struct	hlist_node node;
+	struct	device *dev;
+	struct perf_event *events[NUM_COUNTERS];
+	enum cpuhp_state cpuhp_state;
+	const struct fsl_ddr_devtype_data *devtype_data;
+	int irq;
+	int id;
+	int active_counter;
+};
+
+static ssize_t ddr_perf_identifier_show(struct device *dev,
+					struct device_attribute *attr,
+					char *page)
+{
+	struct ddr_pmu *pmu = dev_get_drvdata(dev);
+
+	return sysfs_emit(page, "%s\n", pmu->devtype_data->identifier);
+}
+
+static umode_t ddr_perf_identifier_attr_visible(struct kobject *kobj,
+						struct attribute *attr,
+						int n)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct ddr_pmu *pmu = dev_get_drvdata(dev);
+
+	if (!pmu->devtype_data->identifier)
+		return 0;
+	return attr->mode;
+};
+
+static struct device_attribute ddr_perf_identifier_attr =
+	__ATTR(identifier, 0444, ddr_perf_identifier_show, NULL);
+
+static struct attribute *ddr_perf_identifier_attrs[] = {
+	&ddr_perf_identifier_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group ddr_perf_identifier_attr_group = {
+	.attrs = ddr_perf_identifier_attrs,
+	.is_visible = ddr_perf_identifier_attr_visible,
+};
+
+enum ddr_perf_filter_capabilities {
+	PERF_CAP_AXI_ID_FILTER = 0,
+	PERF_CAP_AXI_ID_FILTER_ENHANCED,
+	PERF_CAP_AXI_ID_FEAT_MAX,
+};
+
+static u32 ddr_perf_filter_cap_get(struct ddr_pmu *pmu, int cap)
+{
+	u32 quirks = pmu->devtype_data->quirks;
+
+	switch (cap) {
+	case PERF_CAP_AXI_ID_FILTER:
+		return !!(quirks & DDR_CAP_AXI_ID_FILTER);
+	case PERF_CAP_AXI_ID_FILTER_ENHANCED:
+		quirks &= DDR_CAP_AXI_ID_FILTER_ENHANCED;
+		return quirks == DDR_CAP_AXI_ID_FILTER_ENHANCED;
+	default:
+		WARN(1, "unknown filter cap %d\n", cap);
+	}
+
+	return 0;
+}
+
+static ssize_t ddr_perf_filter_cap_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct ddr_pmu *pmu = dev_get_drvdata(dev);
+	struct dev_ext_attribute *ea =
+		container_of(attr, struct dev_ext_attribute, attr);
+	int cap = (long)ea->var;
+
+	return sysfs_emit(buf, "%u\n", ddr_perf_filter_cap_get(pmu, cap));
+}
+
+#define PERF_EXT_ATTR_ENTRY(_name, _func, _var)				\
+	(&((struct dev_ext_attribute) {					\
+		__ATTR(_name, 0444, _func, NULL), (void *)_var		\
+	}).attr.attr)
+
+#define PERF_FILTER_EXT_ATTR_ENTRY(_name, _var)				\
+	PERF_EXT_ATTR_ENTRY(_name, ddr_perf_filter_cap_show, _var)
+
+static struct attribute *ddr_perf_filter_cap_attr[] = {
+	PERF_FILTER_EXT_ATTR_ENTRY(filter, PERF_CAP_AXI_ID_FILTER),
+	PERF_FILTER_EXT_ATTR_ENTRY(enhanced_filter, PERF_CAP_AXI_ID_FILTER_ENHANCED),
+	NULL,
+};
+
+static const struct attribute_group ddr_perf_filter_cap_attr_group = {
+	.name = "caps",
+	.attrs = ddr_perf_filter_cap_attr,
+};
+
+static ssize_t ddr_perf_cpumask_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct ddr_pmu *pmu = dev_get_drvdata(dev);
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu));
+}
+
+static struct device_attribute ddr_perf_cpumask_attr =
+	__ATTR(cpumask, 0444, ddr_perf_cpumask_show, NULL);
+
+static struct attribute *ddr_perf_cpumask_attrs[] = {
+	&ddr_perf_cpumask_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group ddr_perf_cpumask_attr_group = {
+	.attrs = ddr_perf_cpumask_attrs,
+};
+
+static ssize_t
+ddr_pmu_event_show(struct device *dev, struct device_attribute *attr,
+		   char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+	return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+#define IMX8_DDR_PMU_EVENT_ATTR(_name, _id)		\
+	PMU_EVENT_ATTR_ID(_name, ddr_pmu_event_show, _id)
+
+static struct attribute *ddr_perf_events_attrs[] = {
+	IMX8_DDR_PMU_EVENT_ATTR(cycles, EVENT_CYCLES_ID),
+	IMX8_DDR_PMU_EVENT_ATTR(selfresh, 0x01),
+	IMX8_DDR_PMU_EVENT_ATTR(read-accesses, 0x04),
+	IMX8_DDR_PMU_EVENT_ATTR(write-accesses, 0x05),
+	IMX8_DDR_PMU_EVENT_ATTR(read-queue-depth, 0x08),
+	IMX8_DDR_PMU_EVENT_ATTR(write-queue-depth, 0x09),
+	IMX8_DDR_PMU_EVENT_ATTR(lp-read-credit-cnt, 0x10),
+	IMX8_DDR_PMU_EVENT_ATTR(hp-read-credit-cnt, 0x11),
+	IMX8_DDR_PMU_EVENT_ATTR(write-credit-cnt, 0x12),
+	IMX8_DDR_PMU_EVENT_ATTR(read-command, 0x20),
+	IMX8_DDR_PMU_EVENT_ATTR(write-command, 0x21),
+	IMX8_DDR_PMU_EVENT_ATTR(read-modify-write-command, 0x22),
+	IMX8_DDR_PMU_EVENT_ATTR(hp-read, 0x23),
+	IMX8_DDR_PMU_EVENT_ATTR(hp-req-nocredit, 0x24),
+	IMX8_DDR_PMU_EVENT_ATTR(hp-xact-credit, 0x25),
+	IMX8_DDR_PMU_EVENT_ATTR(lp-req-nocredit, 0x26),
+	IMX8_DDR_PMU_EVENT_ATTR(lp-xact-credit, 0x27),
+	IMX8_DDR_PMU_EVENT_ATTR(wr-xact-credit, 0x29),
+	IMX8_DDR_PMU_EVENT_ATTR(read-cycles, 0x2a),
+	IMX8_DDR_PMU_EVENT_ATTR(write-cycles, 0x2b),
+	IMX8_DDR_PMU_EVENT_ATTR(read-write-transition, 0x30),
+	IMX8_DDR_PMU_EVENT_ATTR(precharge, 0x31),
+	IMX8_DDR_PMU_EVENT_ATTR(activate, 0x32),
+	IMX8_DDR_PMU_EVENT_ATTR(load-mode, 0x33),
+	IMX8_DDR_PMU_EVENT_ATTR(perf-mwr, 0x34),
+	IMX8_DDR_PMU_EVENT_ATTR(read, 0x35),
+	IMX8_DDR_PMU_EVENT_ATTR(read-activate, 0x36),
+	IMX8_DDR_PMU_EVENT_ATTR(refresh, 0x37),
+	IMX8_DDR_PMU_EVENT_ATTR(write, 0x38),
+	IMX8_DDR_PMU_EVENT_ATTR(raw-hazard, 0x39),
+	IMX8_DDR_PMU_EVENT_ATTR(axid-read, 0x41),
+	IMX8_DDR_PMU_EVENT_ATTR(axid-write, 0x42),
+	NULL,
+};
+
+static const struct attribute_group ddr_perf_events_attr_group = {
+	.name = "events",
+	.attrs = ddr_perf_events_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+PMU_FORMAT_ATTR(axi_id, "config1:0-15");
+PMU_FORMAT_ATTR(axi_mask, "config1:16-31");
+
+static struct attribute *ddr_perf_format_attrs[] = {
+	&format_attr_event.attr,
+	&format_attr_axi_id.attr,
+	&format_attr_axi_mask.attr,
+	NULL,
+};
+
+static const struct attribute_group ddr_perf_format_attr_group = {
+	.name = "format",
+	.attrs = ddr_perf_format_attrs,
+};
+
+static const struct attribute_group *attr_groups[] = {
+	&ddr_perf_events_attr_group,
+	&ddr_perf_format_attr_group,
+	&ddr_perf_cpumask_attr_group,
+	&ddr_perf_filter_cap_attr_group,
+	&ddr_perf_identifier_attr_group,
+	NULL,
+};
+
+static bool ddr_perf_is_filtered(struct perf_event *event)
+{
+	return event->attr.config == 0x41 || event->attr.config == 0x42;
+}
+
+static u32 ddr_perf_filter_val(struct perf_event *event)
+{
+	return event->attr.config1;
+}
+
+static bool ddr_perf_filters_compatible(struct perf_event *a,
+					struct perf_event *b)
+{
+	if (!ddr_perf_is_filtered(a))
+		return true;
+	if (!ddr_perf_is_filtered(b))
+		return true;
+	return ddr_perf_filter_val(a) == ddr_perf_filter_val(b);
+}
+
+static bool ddr_perf_is_enhanced_filtered(struct perf_event *event)
+{
+	unsigned int filt;
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+
+	filt = pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER_ENHANCED;
+	return (filt == DDR_CAP_AXI_ID_FILTER_ENHANCED) &&
+		ddr_perf_is_filtered(event);
+}
+
+static u32 ddr_perf_alloc_counter(struct ddr_pmu *pmu, int event)
+{
+	int i;
+
+	/*
+	 * Always map cycle event to counter 0
+	 * Cycles counter is dedicated for cycle event
+	 * can't used for the other events
+	 */
+	if (event == EVENT_CYCLES_ID) {
+		if (pmu->events[EVENT_CYCLES_COUNTER] == NULL)
+			return EVENT_CYCLES_COUNTER;
+		else
+			return -ENOENT;
+	}
+
+	for (i = 1; i < NUM_COUNTERS; i++) {
+		if (pmu->events[i] == NULL)
+			return i;
+	}
+
+	return -ENOENT;
+}
+
+static void ddr_perf_free_counter(struct ddr_pmu *pmu, int counter)
+{
+	pmu->events[counter] = NULL;
+}
+
+static u32 ddr_perf_read_counter(struct ddr_pmu *pmu, int counter)
+{
+	struct perf_event *event = pmu->events[counter];
+	void __iomem *base = pmu->base;
+
+	/*
+	 * return bytes instead of bursts from ddr transaction for
+	 * axid-read and axid-write event if PMU core supports enhanced
+	 * filter.
+	 */
+	base += ddr_perf_is_enhanced_filtered(event) ? COUNTER_DPCR1 :
+						       COUNTER_READ;
+	return readl_relaxed(base + counter * 4);
+}
+
+static int ddr_perf_event_init(struct perf_event *event)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	struct perf_event *sibling;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+		return -EOPNOTSUPP;
+
+	if (event->cpu < 0) {
+		dev_warn(pmu->dev, "Can't provide per-task data!\n");
+		return -EOPNOTSUPP;
+	}
+
+	/*
+	 * We must NOT create groups containing mixed PMUs, although software
+	 * events are acceptable (for example to create a CCN group
+	 * periodically read when a hrtimer aka cpu-clock leader triggers).
+	 */
+	if (event->group_leader->pmu != event->pmu &&
+			!is_software_event(event->group_leader))
+		return -EINVAL;
+
+	if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER) {
+		if (!ddr_perf_filters_compatible(event, event->group_leader))
+			return -EINVAL;
+		for_each_sibling_event(sibling, event->group_leader) {
+			if (!ddr_perf_filters_compatible(event, sibling))
+				return -EINVAL;
+		}
+	}
+
+	for_each_sibling_event(sibling, event->group_leader) {
+		if (sibling->pmu != event->pmu &&
+				!is_software_event(sibling))
+			return -EINVAL;
+	}
+
+	event->cpu = pmu->cpu;
+	hwc->idx = -1;
+
+	return 0;
+}
+
+static void ddr_perf_counter_enable(struct ddr_pmu *pmu, int config,
+				  int counter, bool enable)
+{
+	u8 reg = counter * 4 + COUNTER_CNTL;
+	int val;
+
+	if (enable) {
+		/*
+		 * cycle counter is special which should firstly write 0 then
+		 * write 1 into CLEAR bit to clear it. Other counters only
+		 * need write 0 into CLEAR bit and it turns out to be 1 by
+		 * hardware. Below enable flow is harmless for all counters.
+		 */
+		writel(0, pmu->base + reg);
+		val = CNTL_EN | CNTL_CLEAR;
+		val |= FIELD_PREP(CNTL_CSV_MASK, config);
+
+		/*
+		 * On i.MX8MP we need to bias the cycle counter to overflow more often.
+		 * We do this by initializing bits [23:16] of the counter value via the
+		 * COUNTER_CTRL Counter Parameter (CP) field.
+		 */
+		if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER_ENHANCED) {
+			if (counter == EVENT_CYCLES_COUNTER)
+				val |= FIELD_PREP(CNTL_CP_MASK, 0xf0);
+		}
+
+		writel(val, pmu->base + reg);
+	} else {
+		/* Disable counter */
+		val = readl_relaxed(pmu->base + reg) & CNTL_EN_MASK;
+		writel(val, pmu->base + reg);
+	}
+}
+
+static bool ddr_perf_counter_overflow(struct ddr_pmu *pmu, int counter)
+{
+	int val;
+
+	val = readl_relaxed(pmu->base + counter * 4 + COUNTER_CNTL);
+
+	return val & CNTL_OVER;
+}
+
+static void ddr_perf_counter_clear(struct ddr_pmu *pmu, int counter)
+{
+	u8 reg = counter * 4 + COUNTER_CNTL;
+	int val;
+
+	val = readl_relaxed(pmu->base + reg);
+	val &= ~CNTL_CLEAR;
+	writel(val, pmu->base + reg);
+
+	val |= CNTL_CLEAR;
+	writel(val, pmu->base + reg);
+}
+
+static void ddr_perf_event_update(struct perf_event *event)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 new_raw_count;
+	int counter = hwc->idx;
+	int ret;
+
+	new_raw_count = ddr_perf_read_counter(pmu, counter);
+	/* Remove the bias applied in ddr_perf_counter_enable(). */
+	if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER_ENHANCED) {
+		if (counter == EVENT_CYCLES_COUNTER)
+			new_raw_count &= CYCLES_COUNTER_MASK;
+	}
+
+	local64_add(new_raw_count, &event->count);
+
+	/*
+	 * For legacy SoCs: event counter continue counting when overflow,
+	 *                  no need to clear the counter.
+	 * For new SoCs: event counter stop counting when overflow, need
+	 *               clear counter to let it count again.
+	 */
+	if (counter != EVENT_CYCLES_COUNTER) {
+		ret = ddr_perf_counter_overflow(pmu, counter);
+		if (ret)
+			dev_warn_ratelimited(pmu->dev,  "events lost due to counter overflow (config 0x%llx)\n",
+					     event->attr.config);
+	}
+
+	/* clear counter every time for both cycle counter and event counter */
+	ddr_perf_counter_clear(pmu, counter);
+}
+
+static void ddr_perf_event_start(struct perf_event *event, int flags)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
+	local64_set(&hwc->prev_count, 0);
+
+	ddr_perf_counter_enable(pmu, event->attr.config, counter, true);
+
+	if (!pmu->active_counter++)
+		ddr_perf_counter_enable(pmu, EVENT_CYCLES_ID,
+			EVENT_CYCLES_COUNTER, true);
+
+	hwc->state = 0;
+}
+
+static int ddr_perf_event_add(struct perf_event *event, int flags)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int counter;
+	int cfg = event->attr.config;
+	int cfg1 = event->attr.config1;
+
+	if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER) {
+		int i;
+
+		for (i = 1; i < NUM_COUNTERS; i++) {
+			if (pmu->events[i] &&
+			    !ddr_perf_filters_compatible(event, pmu->events[i]))
+				return -EINVAL;
+		}
+
+		if (ddr_perf_is_filtered(event)) {
+			/* revert axi id masking(axi_mask) value */
+			cfg1 ^= AXI_MASKING_REVERT;
+			writel(cfg1, pmu->base + COUNTER_DPCR1);
+		}
+	}
+
+	counter = ddr_perf_alloc_counter(pmu, cfg);
+	if (counter < 0) {
+		dev_dbg(pmu->dev, "There are not enough counters\n");
+		return -EOPNOTSUPP;
+	}
+
+	pmu->events[counter] = event;
+	hwc->idx = counter;
+
+	hwc->state |= PERF_HES_STOPPED;
+
+	if (flags & PERF_EF_START)
+		ddr_perf_event_start(event, flags);
+
+	return 0;
+}
+
+static void ddr_perf_event_stop(struct perf_event *event, int flags)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
+	ddr_perf_counter_enable(pmu, event->attr.config, counter, false);
+	ddr_perf_event_update(event);
+
+	if (!--pmu->active_counter)
+		ddr_perf_counter_enable(pmu, EVENT_CYCLES_ID,
+			EVENT_CYCLES_COUNTER, false);
+
+	hwc->state |= PERF_HES_STOPPED;
+}
+
+static void ddr_perf_event_del(struct perf_event *event, int flags)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
+	ddr_perf_event_stop(event, PERF_EF_UPDATE);
+
+	ddr_perf_free_counter(pmu, counter);
+	hwc->idx = -1;
+}
+
+static void ddr_perf_pmu_enable(struct pmu *pmu)
+{
+}
+
+static void ddr_perf_pmu_disable(struct pmu *pmu)
+{
+}
+
+static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base,
+			 struct device *dev)
+{
+	*pmu = (struct ddr_pmu) {
+		.pmu = (struct pmu) {
+			.module	      = THIS_MODULE,
+			.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+			.task_ctx_nr = perf_invalid_context,
+			.attr_groups = attr_groups,
+			.event_init  = ddr_perf_event_init,
+			.add	     = ddr_perf_event_add,
+			.del	     = ddr_perf_event_del,
+			.start	     = ddr_perf_event_start,
+			.stop	     = ddr_perf_event_stop,
+			.read	     = ddr_perf_event_update,
+			.pmu_enable  = ddr_perf_pmu_enable,
+			.pmu_disable = ddr_perf_pmu_disable,
+		},
+		.base = base,
+		.dev = dev,
+	};
+
+	pmu->id = ida_alloc(&ddr_ida, GFP_KERNEL);
+	return pmu->id;
+}
+
+static irqreturn_t ddr_perf_irq_handler(int irq, void *p)
+{
+	int i;
+	struct ddr_pmu *pmu = (struct ddr_pmu *) p;
+	struct perf_event *event;
+
+	/* all counter will stop if cycle counter disabled */
+	ddr_perf_counter_enable(pmu,
+			      EVENT_CYCLES_ID,
+			      EVENT_CYCLES_COUNTER,
+			      false);
+	/*
+	 * When the cycle counter overflows, all counters are stopped,
+	 * and an IRQ is raised. If any other counter overflows, it
+	 * continues counting, and no IRQ is raised. But for new SoCs,
+	 * such as i.MX8MP, event counter would stop when overflow, so
+	 * we need use cycle counter to stop overflow of event counter.
+	 *
+	 * Cycles occur at least 4 times as often as other events, so we
+	 * can update all events on a cycle counter overflow and not
+	 * lose events.
+	 *
+	 */
+	for (i = 0; i < NUM_COUNTERS; i++) {
+
+		if (!pmu->events[i])
+			continue;
+
+		event = pmu->events[i];
+
+		ddr_perf_event_update(event);
+	}
+
+	ddr_perf_counter_enable(pmu,
+			      EVENT_CYCLES_ID,
+			      EVENT_CYCLES_COUNTER,
+			      true);
+
+	return IRQ_HANDLED;
+}
+
+static int ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct ddr_pmu *pmu = hlist_entry_safe(node, struct ddr_pmu, node);
+	int target;
+
+	if (cpu != pmu->cpu)
+		return 0;
+
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	perf_pmu_migrate_context(&pmu->pmu, cpu, target);
+	pmu->cpu = target;
+
+	WARN_ON(irq_set_affinity(pmu->irq, cpumask_of(pmu->cpu)));
+
+	return 0;
+}
+
+static int ddr_perf_probe(struct platform_device *pdev)
+{
+	struct ddr_pmu *pmu;
+	struct device_node *np;
+	void __iomem *base;
+	char *name;
+	int num;
+	int ret;
+	int irq;
+
+	base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	np = pdev->dev.of_node;
+
+	pmu = devm_kzalloc(&pdev->dev, sizeof(*pmu), GFP_KERNEL);
+	if (!pmu)
+		return -ENOMEM;
+
+	num = ddr_perf_init(pmu, base, &pdev->dev);
+
+	platform_set_drvdata(pdev, pmu);
+
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME "%d",
+			      num);
+	if (!name) {
+		ret = -ENOMEM;
+		goto cpuhp_state_err;
+	}
+
+	pmu->devtype_data = of_device_get_match_data(&pdev->dev);
+
+	pmu->cpu = raw_smp_processor_id();
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+				      DDR_CPUHP_CB_NAME,
+				      NULL,
+				      ddr_perf_offline_cpu);
+
+	if (ret < 0) {
+		dev_err(&pdev->dev, "cpuhp_setup_state_multi failed\n");
+		goto cpuhp_state_err;
+	}
+
+	pmu->cpuhp_state = ret;
+
+	/* Register the pmu instance for cpu hotplug */
+	ret = cpuhp_state_add_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+	if (ret) {
+		dev_err(&pdev->dev, "Error %d registering hotplug\n", ret);
+		goto cpuhp_instance_err;
+	}
+
+	/* Request irq */
+	irq = of_irq_get(np, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "Failed to get irq: %d", irq);
+		ret = irq;
+		goto ddr_perf_err;
+	}
+
+	ret = devm_request_irq(&pdev->dev, irq,
+					ddr_perf_irq_handler,
+					IRQF_NOBALANCING | IRQF_NO_THREAD,
+					DDR_CPUHP_CB_NAME,
+					pmu);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Request irq failed: %d", ret);
+		goto ddr_perf_err;
+	}
+
+	pmu->irq = irq;
+	ret = irq_set_affinity(pmu->irq, cpumask_of(pmu->cpu));
+	if (ret) {
+		dev_err(pmu->dev, "Failed to set interrupt affinity!\n");
+		goto ddr_perf_err;
+	}
+
+	ret = perf_pmu_register(&pmu->pmu, name, -1);
+	if (ret)
+		goto ddr_perf_err;
+
+	return 0;
+
+ddr_perf_err:
+	cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+cpuhp_instance_err:
+	cpuhp_remove_multi_state(pmu->cpuhp_state);
+cpuhp_state_err:
+	ida_free(&ddr_ida, pmu->id);
+	dev_warn(&pdev->dev, "i.MX8 DDR Perf PMU failed (%d), disabled\n", ret);
+	return ret;
+}
+
+static int ddr_perf_remove(struct platform_device *pdev)
+{
+	struct ddr_pmu *pmu = platform_get_drvdata(pdev);
+
+	cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+	cpuhp_remove_multi_state(pmu->cpuhp_state);
+
+	perf_pmu_unregister(&pmu->pmu);
+
+	ida_free(&ddr_ida, pmu->id);
+	return 0;
+}
+
+static struct platform_driver imx_ddr_pmu_driver = {
+	.driver         = {
+		.name   = "imx-ddr-pmu",
+		.of_match_table = imx_ddr_pmu_dt_ids,
+		.suppress_bind_attrs = true,
+	},
+	.probe          = ddr_perf_probe,
+	.remove         = ddr_perf_remove,
+};
+
+module_platform_driver(imx_ddr_pmu_driver);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/fsl_imx9_ddr_perf.c b/drivers/perf/fsl_imx9_ddr_perf.c
new file mode 100644
index 0000000000..5cf770a1bc
--- /dev/null
+++ b/drivers/perf/fsl_imx9_ddr_perf.c
@@ -0,0 +1,709 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright 2023 NXP
+
+#include <linux/bitfield.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/perf_event.h>
+
+/* Performance monitor configuration */
+#define PMCFG1  			0x00
+#define PMCFG1_RD_TRANS_FILT_EN 	BIT(31)
+#define PMCFG1_WR_TRANS_FILT_EN 	BIT(30)
+#define PMCFG1_RD_BT_FILT_EN 		BIT(29)
+#define PMCFG1_ID_MASK  		GENMASK(17, 0)
+
+#define PMCFG2  			0x04
+#define PMCFG2_ID			GENMASK(17, 0)
+
+/* Global control register affects all counters and takes priority over local control registers */
+#define PMGC0		0x40
+/* Global control register bits */
+#define PMGC0_FAC	BIT(31)
+#define PMGC0_PMIE	BIT(30)
+#define PMGC0_FCECE	BIT(29)
+
+/*
+ * 64bit counter0 exclusively dedicated to counting cycles
+ * 32bit counters monitor counter-specific events in addition to counting reference events
+ */
+#define PMLCA(n)	(0x40 + 0x10 + (0x10 * n))
+#define PMLCB(n)	(0x40 + 0x14 + (0x10 * n))
+#define PMC(n)		(0x40 + 0x18 + (0x10 * n))
+/* Local control register bits */
+#define PMLCA_FC	BIT(31)
+#define PMLCA_CE	BIT(26)
+#define PMLCA_EVENT	GENMASK(22, 16)
+
+#define NUM_COUNTERS		11
+#define CYCLES_COUNTER		0
+
+#define to_ddr_pmu(p)		container_of(p, struct ddr_pmu, pmu)
+
+#define DDR_PERF_DEV_NAME	"imx9_ddr"
+#define DDR_CPUHP_CB_NAME	DDR_PERF_DEV_NAME "_perf_pmu"
+
+static DEFINE_IDA(ddr_ida);
+
+struct imx_ddr_devtype_data {
+	const char *identifier;		/* system PMU identifier for userspace */
+};
+
+struct ddr_pmu {
+	struct pmu pmu;
+	void __iomem *base;
+	unsigned int cpu;
+	struct hlist_node node;
+	struct device *dev;
+	struct perf_event *events[NUM_COUNTERS];
+	int active_events;
+	enum cpuhp_state cpuhp_state;
+	const struct imx_ddr_devtype_data *devtype_data;
+	int irq;
+	int id;
+};
+
+static const struct imx_ddr_devtype_data imx93_devtype_data = {
+	.identifier = "imx93",
+};
+
+static const struct of_device_id imx_ddr_pmu_dt_ids[] = {
+	{.compatible = "fsl,imx93-ddr-pmu", .data = &imx93_devtype_data},
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, imx_ddr_pmu_dt_ids);
+
+static ssize_t ddr_perf_identifier_show(struct device *dev,
+					struct device_attribute *attr,
+					char *page)
+{
+	struct ddr_pmu *pmu = dev_get_drvdata(dev);
+
+	return sysfs_emit(page, "%s\n", pmu->devtype_data->identifier);
+}
+
+static struct device_attribute ddr_perf_identifier_attr =
+	__ATTR(identifier, 0444, ddr_perf_identifier_show, NULL);
+
+static struct attribute *ddr_perf_identifier_attrs[] = {
+	&ddr_perf_identifier_attr.attr,
+	NULL,
+};
+
+static struct attribute_group ddr_perf_identifier_attr_group = {
+	.attrs = ddr_perf_identifier_attrs,
+};
+
+static ssize_t ddr_perf_cpumask_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct ddr_pmu *pmu = dev_get_drvdata(dev);
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu));
+}
+
+static struct device_attribute ddr_perf_cpumask_attr =
+	__ATTR(cpumask, 0444, ddr_perf_cpumask_show, NULL);
+
+static struct attribute *ddr_perf_cpumask_attrs[] = {
+	&ddr_perf_cpumask_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group ddr_perf_cpumask_attr_group = {
+	.attrs = ddr_perf_cpumask_attrs,
+};
+
+static ssize_t ddr_pmu_event_show(struct device *dev,
+				  struct device_attribute *attr, char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+	return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+#define IMX9_DDR_PMU_EVENT_ATTR(_name, _id)				\
+	(&((struct perf_pmu_events_attr[]) {				\
+		{ .attr = __ATTR(_name, 0444, ddr_pmu_event_show, NULL),\
+		  .id = _id, }						\
+	})[0].attr.attr)
+
+static struct attribute *ddr_perf_events_attrs[] = {
+	/* counter0 cycles event */
+	IMX9_DDR_PMU_EVENT_ATTR(cycles, 0),
+
+	/* reference events for all normal counters, need assert DEBUG19[21] bit */
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ddrc1_rmw_for_ecc, 12),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_rreorder, 13),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_wreorder, 14),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_0, 15),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_1, 16),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_2, 17),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_3, 18),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_4, 19),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_5, 22),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_6, 23),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_7, 24),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_8, 25),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_9, 26),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_10, 27),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_11, 28),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_12, 31),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_13, 59),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_15, 61),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_29, 63),
+
+	/* counter1 specific events */
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_0, 64),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_1, 65),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_2, 66),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_3, 67),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_4, 68),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_5, 69),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_6, 70),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_7, 71),
+
+	/* counter2 specific events */
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_0, 64),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_1, 65),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_2, 66),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_3, 67),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_4, 68),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_5, 69),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_6, 70),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_7, 71),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_empty, 72),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_trans_filt, 73),
+
+	/* counter3 specific events */
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_0, 64),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_1, 65),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_2, 66),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_3, 67),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_4, 68),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_5, 69),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_6, 70),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_7, 71),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_full, 72),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_wr_trans_filt, 73),
+
+	/* counter4 specific events */
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_0, 64),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_1, 65),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_2, 66),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_3, 67),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_4, 68),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_5, 69),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_6, 70),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_7, 71),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2_rmw, 72),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt, 73),
+
+	/* counter5 specific events */
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_0, 64),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_1, 65),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_2, 66),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_3, 67),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_4, 68),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_5, 69),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_6, 70),
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_7, 71),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq1, 72),
+
+	/* counter6 specific events */
+	IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_end_0, 64),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2, 72),
+
+	/* counter7 specific events */
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_1_2_full, 64),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_wrq0, 65),
+
+	/* counter8 specific events */
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_bias_switched, 64),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_1_4_full, 65),
+
+	/* counter9 specific events */
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_wrq1, 65),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_3_4_full, 66),
+
+	/* counter10 specific events */
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_misc_mrk, 65),
+	IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq0, 66),
+	NULL,
+};
+
+static const struct attribute_group ddr_perf_events_attr_group = {
+	.name = "events",
+	.attrs = ddr_perf_events_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+PMU_FORMAT_ATTR(counter, "config:8-15");
+PMU_FORMAT_ATTR(axi_id, "config1:0-17");
+PMU_FORMAT_ATTR(axi_mask, "config2:0-17");
+
+static struct attribute *ddr_perf_format_attrs[] = {
+	&format_attr_event.attr,
+	&format_attr_counter.attr,
+	&format_attr_axi_id.attr,
+	&format_attr_axi_mask.attr,
+	NULL,
+};
+
+static const struct attribute_group ddr_perf_format_attr_group = {
+	.name = "format",
+	.attrs = ddr_perf_format_attrs,
+};
+
+static const struct attribute_group *attr_groups[] = {
+	&ddr_perf_identifier_attr_group,
+	&ddr_perf_cpumask_attr_group,
+	&ddr_perf_events_attr_group,
+	&ddr_perf_format_attr_group,
+	NULL,
+};
+
+static void ddr_perf_clear_counter(struct ddr_pmu *pmu, int counter)
+{
+	if (counter == CYCLES_COUNTER) {
+		writel(0, pmu->base + PMC(counter) + 0x4);
+		writel(0, pmu->base + PMC(counter));
+	} else {
+		writel(0, pmu->base + PMC(counter));
+	}
+}
+
+static u64 ddr_perf_read_counter(struct ddr_pmu *pmu, int counter)
+{
+	u32 val_lower, val_upper;
+	u64 val;
+
+	if (counter != CYCLES_COUNTER) {
+		val = readl_relaxed(pmu->base + PMC(counter));
+		goto out;
+	}
+
+	/* special handling for reading 64bit cycle counter */
+	do {
+		val_upper = readl_relaxed(pmu->base + PMC(counter) + 0x4);
+		val_lower = readl_relaxed(pmu->base + PMC(counter));
+	} while (val_upper != readl_relaxed(pmu->base + PMC(counter) + 0x4));
+
+	val = val_upper;
+	val = (val << 32);
+	val |= val_lower;
+out:
+	return val;
+}
+
+static void ddr_perf_counter_global_config(struct ddr_pmu *pmu, bool enable)
+{
+	u32 ctrl;
+
+	ctrl = readl_relaxed(pmu->base + PMGC0);
+
+	if (enable) {
+		/*
+		 * The performance monitor must be reset before event counting
+		 * sequences. The performance monitor can be reset by first freezing
+		 * one or more counters and then clearing the freeze condition to
+		 * allow the counters to count according to the settings in the
+		 * performance monitor registers. Counters can be frozen individually
+		 * by setting PMLCAn[FC] bits, or simultaneously by setting PMGC0[FAC].
+		 * Simply clearing these freeze bits will then allow the performance
+		 * monitor to begin counting based on the register settings.
+		 */
+		ctrl |= PMGC0_FAC;
+		writel(ctrl, pmu->base + PMGC0);
+
+		/*
+		 * Freeze all counters disabled, interrupt enabled, and freeze
+		 * counters on condition enabled.
+		 */
+		ctrl &= ~PMGC0_FAC;
+		ctrl |= PMGC0_PMIE | PMGC0_FCECE;
+		writel(ctrl, pmu->base + PMGC0);
+	} else {
+		ctrl |= PMGC0_FAC;
+		ctrl &= ~(PMGC0_PMIE | PMGC0_FCECE);
+		writel(ctrl, pmu->base + PMGC0);
+	}
+}
+
+static void ddr_perf_counter_local_config(struct ddr_pmu *pmu, int config,
+				    int counter, bool enable)
+{
+	u32 ctrl_a;
+
+	ctrl_a = readl_relaxed(pmu->base + PMLCA(counter));
+
+	if (enable) {
+		ctrl_a |= PMLCA_FC;
+		writel(ctrl_a, pmu->base + PMLCA(counter));
+
+		ddr_perf_clear_counter(pmu, counter);
+
+		/* Freeze counter disabled, condition enabled, and program event.*/
+		ctrl_a &= ~PMLCA_FC;
+		ctrl_a |= PMLCA_CE;
+		ctrl_a &= ~FIELD_PREP(PMLCA_EVENT, 0x7F);
+		ctrl_a |= FIELD_PREP(PMLCA_EVENT, (config & 0x000000FF));
+		writel(ctrl_a, pmu->base + PMLCA(counter));
+	} else {
+		/* Freeze counter. */
+		ctrl_a |= PMLCA_FC;
+		writel(ctrl_a, pmu->base + PMLCA(counter));
+	}
+}
+
+static void ddr_perf_monitor_config(struct ddr_pmu *pmu, int cfg, int cfg1, int cfg2)
+{
+	u32 pmcfg1, pmcfg2;
+	int event, counter;
+
+	event = cfg & 0x000000FF;
+	counter = (cfg & 0x0000FF00) >> 8;
+
+	pmcfg1 = readl_relaxed(pmu->base + PMCFG1);
+
+	if (counter == 2 && event == 73)
+		pmcfg1 |= PMCFG1_RD_TRANS_FILT_EN;
+	else if (counter == 2 && event != 73)
+		pmcfg1 &= ~PMCFG1_RD_TRANS_FILT_EN;
+
+	if (counter == 3 && event == 73)
+		pmcfg1 |= PMCFG1_WR_TRANS_FILT_EN;
+	else if (counter == 3 && event != 73)
+		pmcfg1 &= ~PMCFG1_WR_TRANS_FILT_EN;
+
+	if (counter == 4 && event == 73)
+		pmcfg1 |= PMCFG1_RD_BT_FILT_EN;
+	else if (counter == 4 && event != 73)
+		pmcfg1 &= ~PMCFG1_RD_BT_FILT_EN;
+
+	pmcfg1 &= ~FIELD_PREP(PMCFG1_ID_MASK, 0x3FFFF);
+	pmcfg1 |= FIELD_PREP(PMCFG1_ID_MASK, cfg2);
+	writel(pmcfg1, pmu->base + PMCFG1);
+
+	pmcfg2 = readl_relaxed(pmu->base + PMCFG2);
+	pmcfg2 &= ~FIELD_PREP(PMCFG2_ID, 0x3FFFF);
+	pmcfg2 |= FIELD_PREP(PMCFG2_ID, cfg1);
+	writel(pmcfg2, pmu->base + PMCFG2);
+}
+
+static void ddr_perf_event_update(struct perf_event *event)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+	u64 new_raw_count;
+
+	new_raw_count = ddr_perf_read_counter(pmu, counter);
+	local64_add(new_raw_count, &event->count);
+
+	/* clear counter's value every time */
+	ddr_perf_clear_counter(pmu, counter);
+}
+
+static int ddr_perf_event_init(struct perf_event *event)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	struct perf_event *sibling;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+		return -EOPNOTSUPP;
+
+	if (event->cpu < 0) {
+		dev_warn(pmu->dev, "Can't provide per-task data!\n");
+		return -EOPNOTSUPP;
+	}
+
+	/*
+	 * We must NOT create groups containing mixed PMUs, although software
+	 * events are acceptable (for example to create a CCN group
+	 * periodically read when a hrtimer aka cpu-clock leader triggers).
+	 */
+	if (event->group_leader->pmu != event->pmu &&
+			!is_software_event(event->group_leader))
+		return -EINVAL;
+
+	for_each_sibling_event(sibling, event->group_leader) {
+		if (sibling->pmu != event->pmu &&
+				!is_software_event(sibling))
+			return -EINVAL;
+	}
+
+	event->cpu = pmu->cpu;
+	hwc->idx = -1;
+
+	return 0;
+}
+
+static void ddr_perf_event_start(struct perf_event *event, int flags)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
+	local64_set(&hwc->prev_count, 0);
+
+	ddr_perf_counter_local_config(pmu, event->attr.config, counter, true);
+	hwc->state = 0;
+}
+
+static int ddr_perf_event_add(struct perf_event *event, int flags)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int cfg = event->attr.config;
+	int cfg1 = event->attr.config1;
+	int cfg2 = event->attr.config2;
+	int counter;
+
+	counter = (cfg & 0x0000FF00) >> 8;
+
+	pmu->events[counter] = event;
+	pmu->active_events++;
+	hwc->idx = counter;
+	hwc->state |= PERF_HES_STOPPED;
+
+	if (flags & PERF_EF_START)
+		ddr_perf_event_start(event, flags);
+
+	/* read trans, write trans, read beat */
+	ddr_perf_monitor_config(pmu, cfg, cfg1, cfg2);
+
+	return 0;
+}
+
+static void ddr_perf_event_stop(struct perf_event *event, int flags)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
+	ddr_perf_counter_local_config(pmu, event->attr.config, counter, false);
+	ddr_perf_event_update(event);
+
+	hwc->state |= PERF_HES_STOPPED;
+}
+
+static void ddr_perf_event_del(struct perf_event *event, int flags)
+{
+	struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	ddr_perf_event_stop(event, PERF_EF_UPDATE);
+
+	pmu->active_events--;
+	hwc->idx = -1;
+}
+
+static void ddr_perf_pmu_enable(struct pmu *pmu)
+{
+	struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
+
+	ddr_perf_counter_global_config(ddr_pmu, true);
+}
+
+static void ddr_perf_pmu_disable(struct pmu *pmu)
+{
+	struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
+
+	ddr_perf_counter_global_config(ddr_pmu, false);
+}
+
+static void ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base,
+			 struct device *dev)
+{
+	*pmu = (struct ddr_pmu) {
+		.pmu = (struct pmu) {
+			.module       = THIS_MODULE,
+			.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+			.task_ctx_nr  = perf_invalid_context,
+			.attr_groups  = attr_groups,
+			.event_init   = ddr_perf_event_init,
+			.add          = ddr_perf_event_add,
+			.del          = ddr_perf_event_del,
+			.start        = ddr_perf_event_start,
+			.stop         = ddr_perf_event_stop,
+			.read         = ddr_perf_event_update,
+			.pmu_enable   = ddr_perf_pmu_enable,
+			.pmu_disable  = ddr_perf_pmu_disable,
+		},
+		.base = base,
+		.dev = dev,
+	};
+}
+
+static irqreturn_t ddr_perf_irq_handler(int irq, void *p)
+{
+	struct ddr_pmu *pmu = (struct ddr_pmu *)p;
+	struct perf_event *event;
+	int i;
+
+	/*
+	 * Counters can generate an interrupt on an overflow when msb of a
+	 * counter changes from 0 to 1. For the interrupt to be signalled,
+	 * below condition mush be satisfied:
+	 * PMGC0[PMIE] = 1, PMGC0[FCECE] = 1, PMLCAn[CE] = 1
+	 * When an interrupt is signalled, PMGC0[FAC] is set by hardware and
+	 * all of the registers are frozen.
+	 * Software can clear the interrupt condition by resetting the performance
+	 * monitor and clearing the most significant bit of the counter that
+	 * generate the overflow.
+	 */
+	for (i = 0; i < NUM_COUNTERS; i++) {
+		if (!pmu->events[i])
+			continue;
+
+		event = pmu->events[i];
+
+		ddr_perf_event_update(event);
+	}
+
+	ddr_perf_counter_global_config(pmu, true);
+
+	return IRQ_HANDLED;
+}
+
+static int ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct ddr_pmu *pmu = hlist_entry_safe(node, struct ddr_pmu, node);
+	int target;
+
+	if (cpu != pmu->cpu)
+		return 0;
+
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	perf_pmu_migrate_context(&pmu->pmu, cpu, target);
+	pmu->cpu = target;
+
+	WARN_ON(irq_set_affinity(pmu->irq, cpumask_of(pmu->cpu)));
+
+	return 0;
+}
+
+static int ddr_perf_probe(struct platform_device *pdev)
+{
+	struct ddr_pmu *pmu;
+	void __iomem *base;
+	int ret, irq;
+	char *name;
+
+	base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	pmu = devm_kzalloc(&pdev->dev, sizeof(*pmu), GFP_KERNEL);
+	if (!pmu)
+		return -ENOMEM;
+
+	ddr_perf_init(pmu, base, &pdev->dev);
+
+	pmu->devtype_data = of_device_get_match_data(&pdev->dev);
+
+	platform_set_drvdata(pdev, pmu);
+
+	pmu->id = ida_simple_get(&ddr_ida, 0, 0, GFP_KERNEL);
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME "%d", pmu->id);
+	if (!name) {
+		ret = -ENOMEM;
+		goto format_string_err;
+	}
+
+	pmu->cpu = raw_smp_processor_id();
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, DDR_CPUHP_CB_NAME,
+				      NULL, ddr_perf_offline_cpu);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to add callbacks for multi state\n");
+		goto cpuhp_state_err;
+	}
+	pmu->cpuhp_state = ret;
+
+	/* Register the pmu instance for cpu hotplug */
+	ret = cpuhp_state_add_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+	if (ret) {
+		dev_err(&pdev->dev, "Error %d registering hotplug\n", ret);
+		goto cpuhp_instance_err;
+	}
+
+	/* Request irq */
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		ret = irq;
+		goto ddr_perf_err;
+	}
+
+	ret = devm_request_irq(&pdev->dev, irq, ddr_perf_irq_handler,
+			       IRQF_NOBALANCING | IRQF_NO_THREAD,
+			       DDR_CPUHP_CB_NAME, pmu);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Request irq failed: %d", ret);
+		goto ddr_perf_err;
+	}
+
+	pmu->irq = irq;
+	ret = irq_set_affinity(pmu->irq, cpumask_of(pmu->cpu));
+	if (ret) {
+		dev_err(pmu->dev, "Failed to set interrupt affinity\n");
+		goto ddr_perf_err;
+	}
+
+	ret = perf_pmu_register(&pmu->pmu, name, -1);
+	if (ret)
+		goto ddr_perf_err;
+
+	return 0;
+
+ddr_perf_err:
+	cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+cpuhp_instance_err:
+	cpuhp_remove_multi_state(pmu->cpuhp_state);
+cpuhp_state_err:
+format_string_err:
+	ida_simple_remove(&ddr_ida, pmu->id);
+	dev_warn(&pdev->dev, "i.MX9 DDR Perf PMU failed (%d), disabled\n", ret);
+	return ret;
+}
+
+static int ddr_perf_remove(struct platform_device *pdev)
+{
+	struct ddr_pmu *pmu = platform_get_drvdata(pdev);
+
+	cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+	cpuhp_remove_multi_state(pmu->cpuhp_state);
+
+	perf_pmu_unregister(&pmu->pmu);
+
+	ida_simple_remove(&ddr_ida, pmu->id);
+
+	return 0;
+}
+
+static struct platform_driver imx_ddr_pmu_driver = {
+	.driver         = {
+		.name                = "imx9-ddr-pmu",
+		.of_match_table      = imx_ddr_pmu_dt_ids,
+		.suppress_bind_attrs = true,
+	},
+	.probe          = ddr_perf_probe,
+	.remove         = ddr_perf_remove,
+};
+module_platform_driver(imx_ddr_pmu_driver);
+
+MODULE_AUTHOR("Xu Yang <xu.yang_2@nxp.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("DDRC PerfMon for i.MX9 SoCs");
diff --git a/drivers/perf/hisilicon/Kconfig b/drivers/perf/hisilicon/Kconfig
new file mode 100644
index 0000000000..171bfc1b6b
--- /dev/null
+++ b/drivers/perf/hisilicon/Kconfig
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config HISI_PMU
+	tristate "HiSilicon SoC PMU drivers"
+	depends on ARM64 && ACPI
+	  help
+	  Support for HiSilicon SoC L3 Cache performance monitor, Hydra Home
+	  Agent performance monitor and DDR Controller performance monitor.
+
+config HISI_PCIE_PMU
+	tristate "HiSilicon PCIE PERF PMU"
+	depends on PCI && ARM64
+	help
+	  Provide support for HiSilicon PCIe performance monitoring unit (PMU)
+	  RCiEP devices.
+	  Adds the PCIe PMU into perf events system for monitoring latency,
+	  bandwidth etc.
+
+config HNS3_PMU
+	tristate "HNS3 PERF PMU"
+	depends on ARM64 || COMPILE_TEST
+	depends on PCI
+	help
+	  Provide support for HNS3 performance monitoring unit (PMU) RCiEP
+	  devices.
+	  Adds the HNS3 PMU into perf events system for monitoring latency,
+	  bandwidth etc.
diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile
new file mode 100644
index 0000000000..48dcc8381e
--- /dev/null
+++ b/drivers/perf/hisilicon/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \
+			  hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \
+			  hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o hisi_uncore_uc_pmu.o
+
+obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o
+obj-$(CONFIG_HNS3_PMU) += hns3_pmu.o
diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c
new file mode 100644
index 0000000000..051efffc44
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c
@@ -0,0 +1,970 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This driver adds support for PCIe PMU RCiEP device. Related
+ * perf events are bandwidth, latency etc.
+ *
+ * Copyright (C) 2021 HiSilicon Limited
+ * Author: Qi Liu <liuqi115@huawei.com>
+ */
+#include <linux/bitfield.h>
+#include <linux/bitmap.h>
+#include <linux/bug.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/perf_event.h>
+
+#define DRV_NAME "hisi_pcie_pmu"
+/* Define registers */
+#define HISI_PCIE_GLOBAL_CTRL		0x00
+#define HISI_PCIE_EVENT_CTRL		0x010
+#define HISI_PCIE_CNT			0x090
+#define HISI_PCIE_EXT_CNT		0x110
+#define HISI_PCIE_INT_STAT		0x150
+#define HISI_PCIE_INT_MASK		0x154
+#define HISI_PCIE_REG_BDF		0xfe0
+#define HISI_PCIE_REG_VERSION		0xfe4
+#define HISI_PCIE_REG_INFO		0xfe8
+
+/* Define command in HISI_PCIE_GLOBAL_CTRL */
+#define HISI_PCIE_GLOBAL_EN		0x01
+#define HISI_PCIE_GLOBAL_NONE		0
+
+/* Define command in HISI_PCIE_EVENT_CTRL */
+#define HISI_PCIE_EVENT_EN		BIT_ULL(20)
+#define HISI_PCIE_RESET_CNT		BIT_ULL(22)
+#define HISI_PCIE_INIT_SET		BIT_ULL(34)
+#define HISI_PCIE_THR_EN		BIT_ULL(26)
+#define HISI_PCIE_TARGET_EN		BIT_ULL(32)
+#define HISI_PCIE_TRIG_EN		BIT_ULL(52)
+
+/* Define offsets in HISI_PCIE_EVENT_CTRL */
+#define HISI_PCIE_EVENT_M		GENMASK_ULL(15, 0)
+#define HISI_PCIE_THR_MODE_M		GENMASK_ULL(27, 27)
+#define HISI_PCIE_THR_M			GENMASK_ULL(31, 28)
+#define HISI_PCIE_LEN_M			GENMASK_ULL(35, 34)
+#define HISI_PCIE_TARGET_M		GENMASK_ULL(52, 36)
+#define HISI_PCIE_TRIG_MODE_M		GENMASK_ULL(53, 53)
+#define HISI_PCIE_TRIG_M		GENMASK_ULL(59, 56)
+
+/* Default config of TLP length mode, will count both TLP headers and payloads */
+#define HISI_PCIE_LEN_M_DEFAULT		3ULL
+
+#define HISI_PCIE_MAX_COUNTERS		8
+#define HISI_PCIE_REG_STEP		8
+#define HISI_PCIE_THR_MAX_VAL		10
+#define HISI_PCIE_TRIG_MAX_VAL		10
+#define HISI_PCIE_MAX_PERIOD		(GENMASK_ULL(63, 0))
+#define HISI_PCIE_INIT_VAL		BIT_ULL(63)
+
+struct hisi_pcie_pmu {
+	struct perf_event *hw_events[HISI_PCIE_MAX_COUNTERS];
+	struct hlist_node node;
+	struct pci_dev *pdev;
+	struct pmu pmu;
+	void __iomem *base;
+	int irq;
+	u32 identifier;
+	/* Minimum and maximum BDF of root ports monitored by PMU */
+	u16 bdf_min;
+	u16 bdf_max;
+	int on_cpu;
+};
+
+struct hisi_pcie_reg_pair {
+	u16 lo;
+	u16 hi;
+};
+
+#define to_pcie_pmu(p)  (container_of((p), struct hisi_pcie_pmu, pmu))
+#define GET_PCI_DEVFN(bdf)  ((bdf) & 0xff)
+
+#define HISI_PCIE_PMU_FILTER_ATTR(_name, _config, _hi, _lo)		  \
+	static u64 hisi_pcie_get_##_name(struct perf_event *event)	  \
+	{								  \
+		return FIELD_GET(GENMASK(_hi, _lo), event->attr._config); \
+	}								  \
+
+HISI_PCIE_PMU_FILTER_ATTR(event, config, 16, 0);
+HISI_PCIE_PMU_FILTER_ATTR(thr_len, config1, 3, 0);
+HISI_PCIE_PMU_FILTER_ATTR(thr_mode, config1, 4, 4);
+HISI_PCIE_PMU_FILTER_ATTR(trig_len, config1, 8, 5);
+HISI_PCIE_PMU_FILTER_ATTR(trig_mode, config1, 9, 9);
+HISI_PCIE_PMU_FILTER_ATTR(len_mode, config1, 11, 10);
+HISI_PCIE_PMU_FILTER_ATTR(port, config2, 15, 0);
+HISI_PCIE_PMU_FILTER_ATTR(bdf, config2, 31, 16);
+
+static ssize_t hisi_pcie_format_sysfs_show(struct device *dev, struct device_attribute *attr,
+					   char *buf)
+{
+	struct dev_ext_attribute *eattr;
+
+	eattr = container_of(attr, struct dev_ext_attribute, attr);
+
+	return sysfs_emit(buf, "%s\n", (char *)eattr->var);
+}
+
+static ssize_t hisi_pcie_event_sysfs_show(struct device *dev, struct device_attribute *attr,
+					  char *buf)
+{
+	struct perf_pmu_events_attr *pmu_attr =
+		container_of(attr, struct perf_pmu_events_attr, attr);
+
+	return sysfs_emit(buf, "config=0x%llx\n", pmu_attr->id);
+}
+
+#define HISI_PCIE_PMU_FORMAT_ATTR(_name, _format)                              \
+	(&((struct dev_ext_attribute[]){                                       \
+		{ .attr = __ATTR(_name, 0444, hisi_pcie_format_sysfs_show,     \
+				 NULL),                                        \
+		  .var = (void *)_format }                                     \
+	})[0].attr.attr)
+
+#define HISI_PCIE_PMU_EVENT_ATTR(_name, _id)			\
+	PMU_EVENT_ATTR_ID(_name, hisi_pcie_event_sysfs_show, _id)
+
+static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(dev_get_drvdata(dev));
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(pcie_pmu->on_cpu));
+}
+static DEVICE_ATTR_RO(cpumask);
+
+static ssize_t identifier_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(dev_get_drvdata(dev));
+
+	return sysfs_emit(buf, "%#x\n", pcie_pmu->identifier);
+}
+static DEVICE_ATTR_RO(identifier);
+
+static ssize_t bus_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(dev_get_drvdata(dev));
+
+	return sysfs_emit(buf, "%#04x\n", PCI_BUS_NUM(pcie_pmu->bdf_min));
+}
+static DEVICE_ATTR_RO(bus);
+
+static struct hisi_pcie_reg_pair
+hisi_pcie_parse_reg_value(struct hisi_pcie_pmu *pcie_pmu, u32 reg_off)
+{
+	u32 val = readl_relaxed(pcie_pmu->base + reg_off);
+	struct hisi_pcie_reg_pair regs = {
+		.lo = val,
+		.hi = val >> 16,
+	};
+
+	return regs;
+}
+
+/*
+ * Hardware counter and ext_counter work together for bandwidth, latency, bus
+ * utilization and buffer occupancy events. For example, RX memory write latency
+ * events(index = 0x0010), counter counts total delay cycles and ext_counter
+ * counts RX memory write PCIe packets number.
+ *
+ * As we don't want PMU driver to process these two data, "delay cycles" can
+ * be treated as an independent event(index = 0x0010), "RX memory write packets
+ * number" as another(index = 0x10010). BIT 16 is used to distinguish and 0-15
+ * bits are "real" event index, which can be used to set HISI_PCIE_EVENT_CTRL.
+ */
+#define EXT_COUNTER_IS_USED(idx)		((idx) & BIT(16))
+
+static u32 hisi_pcie_get_real_event(struct perf_event *event)
+{
+	return hisi_pcie_get_event(event) & GENMASK(15, 0);
+}
+
+static u32 hisi_pcie_pmu_get_offset(u32 offset, u32 idx)
+{
+	return offset + HISI_PCIE_REG_STEP * idx;
+}
+
+static u32 hisi_pcie_pmu_readl(struct hisi_pcie_pmu *pcie_pmu, u32 reg_offset,
+			       u32 idx)
+{
+	u32 offset = hisi_pcie_pmu_get_offset(reg_offset, idx);
+
+	return readl_relaxed(pcie_pmu->base + offset);
+}
+
+static void hisi_pcie_pmu_writel(struct hisi_pcie_pmu *pcie_pmu, u32 reg_offset, u32 idx, u32 val)
+{
+	u32 offset = hisi_pcie_pmu_get_offset(reg_offset, idx);
+
+	writel_relaxed(val, pcie_pmu->base + offset);
+}
+
+static u64 hisi_pcie_pmu_readq(struct hisi_pcie_pmu *pcie_pmu, u32 reg_offset, u32 idx)
+{
+	u32 offset = hisi_pcie_pmu_get_offset(reg_offset, idx);
+
+	return readq_relaxed(pcie_pmu->base + offset);
+}
+
+static void hisi_pcie_pmu_writeq(struct hisi_pcie_pmu *pcie_pmu, u32 reg_offset, u32 idx, u64 val)
+{
+	u32 offset = hisi_pcie_pmu_get_offset(reg_offset, idx);
+
+	writeq_relaxed(val, pcie_pmu->base + offset);
+}
+
+static void hisi_pcie_pmu_config_filter(struct perf_event *event)
+{
+	struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 port, trig_len, thr_len, len_mode;
+	u64 reg = HISI_PCIE_INIT_SET;
+
+	/* Config HISI_PCIE_EVENT_CTRL according to event. */
+	reg |= FIELD_PREP(HISI_PCIE_EVENT_M, hisi_pcie_get_real_event(event));
+
+	/* Config HISI_PCIE_EVENT_CTRL according to root port or EP device. */
+	port = hisi_pcie_get_port(event);
+	if (port)
+		reg |= FIELD_PREP(HISI_PCIE_TARGET_M, port);
+	else
+		reg |= HISI_PCIE_TARGET_EN |
+		       FIELD_PREP(HISI_PCIE_TARGET_M, hisi_pcie_get_bdf(event));
+
+	/* Config HISI_PCIE_EVENT_CTRL according to trigger condition. */
+	trig_len = hisi_pcie_get_trig_len(event);
+	if (trig_len) {
+		reg |= FIELD_PREP(HISI_PCIE_TRIG_M, trig_len);
+		reg |= FIELD_PREP(HISI_PCIE_TRIG_MODE_M, hisi_pcie_get_trig_mode(event));
+		reg |= HISI_PCIE_TRIG_EN;
+	}
+
+	/* Config HISI_PCIE_EVENT_CTRL according to threshold condition. */
+	thr_len = hisi_pcie_get_thr_len(event);
+	if (thr_len) {
+		reg |= FIELD_PREP(HISI_PCIE_THR_M, thr_len);
+		reg |= FIELD_PREP(HISI_PCIE_THR_MODE_M, hisi_pcie_get_thr_mode(event));
+		reg |= HISI_PCIE_THR_EN;
+	}
+
+	len_mode = hisi_pcie_get_len_mode(event);
+	if (len_mode)
+		reg |= FIELD_PREP(HISI_PCIE_LEN_M, len_mode);
+	else
+		reg |= FIELD_PREP(HISI_PCIE_LEN_M, HISI_PCIE_LEN_M_DEFAULT);
+
+	hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, hwc->idx, reg);
+}
+
+static void hisi_pcie_pmu_clear_filter(struct perf_event *event)
+{
+	struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, hwc->idx, HISI_PCIE_INIT_SET);
+}
+
+static bool hisi_pcie_pmu_valid_requester_id(struct hisi_pcie_pmu *pcie_pmu, u32 bdf)
+{
+	struct pci_dev *root_port, *pdev;
+	u16 rp_bdf;
+
+	pdev = pci_get_domain_bus_and_slot(pci_domain_nr(pcie_pmu->pdev->bus), PCI_BUS_NUM(bdf),
+					   GET_PCI_DEVFN(bdf));
+	if (!pdev)
+		return false;
+
+	root_port = pcie_find_root_port(pdev);
+	if (!root_port) {
+		pci_dev_put(pdev);
+		return false;
+	}
+
+	pci_dev_put(pdev);
+	rp_bdf = pci_dev_id(root_port);
+	return rp_bdf >= pcie_pmu->bdf_min && rp_bdf <= pcie_pmu->bdf_max;
+}
+
+static bool hisi_pcie_pmu_valid_filter(struct perf_event *event,
+				       struct hisi_pcie_pmu *pcie_pmu)
+{
+	u32 requester_id = hisi_pcie_get_bdf(event);
+
+	if (hisi_pcie_get_thr_len(event) > HISI_PCIE_THR_MAX_VAL)
+		return false;
+
+	if (hisi_pcie_get_trig_len(event) > HISI_PCIE_TRIG_MAX_VAL)
+		return false;
+
+	if (requester_id) {
+		if (!hisi_pcie_pmu_valid_requester_id(pcie_pmu, requester_id))
+			return false;
+	}
+
+	return true;
+}
+
+static bool hisi_pcie_pmu_cmp_event(struct perf_event *target,
+					struct perf_event *event)
+{
+	return hisi_pcie_get_real_event(target) == hisi_pcie_get_real_event(event);
+}
+
+static bool hisi_pcie_pmu_validate_event_group(struct perf_event *event)
+{
+	struct perf_event *sibling, *leader = event->group_leader;
+	struct perf_event *event_group[HISI_PCIE_MAX_COUNTERS];
+	int counters = 1;
+	int num;
+
+	event_group[0] = leader;
+	if (!is_software_event(leader)) {
+		if (leader->pmu != event->pmu)
+			return false;
+
+		if (leader != event && !hisi_pcie_pmu_cmp_event(leader, event))
+			event_group[counters++] = event;
+	}
+
+	for_each_sibling_event(sibling, event->group_leader) {
+		if (is_software_event(sibling))
+			continue;
+
+		if (sibling->pmu != event->pmu)
+			return false;
+
+		for (num = 0; num < counters; num++) {
+			if (hisi_pcie_pmu_cmp_event(event_group[num], sibling))
+				break;
+		}
+
+		if (num == counters)
+			event_group[counters++] = sibling;
+	}
+
+	return counters <= HISI_PCIE_MAX_COUNTERS;
+}
+
+static int hisi_pcie_pmu_event_init(struct perf_event *event)
+{
+	struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	/* Check the type first before going on, otherwise it's not our event */
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	event->cpu = pcie_pmu->on_cpu;
+
+	if (EXT_COUNTER_IS_USED(hisi_pcie_get_event(event)))
+		hwc->event_base = HISI_PCIE_EXT_CNT;
+	else
+		hwc->event_base = HISI_PCIE_CNT;
+
+	/* Sampling is not supported. */
+	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+		return -EOPNOTSUPP;
+
+	if (!hisi_pcie_pmu_valid_filter(event, pcie_pmu))
+		return -EINVAL;
+
+	if (!hisi_pcie_pmu_validate_event_group(event))
+		return -EINVAL;
+
+	return 0;
+}
+
+static u64 hisi_pcie_pmu_read_counter(struct perf_event *event)
+{
+	struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
+	u32 idx = event->hw.idx;
+
+	return hisi_pcie_pmu_readq(pcie_pmu, event->hw.event_base, idx);
+}
+
+static int hisi_pcie_pmu_find_related_event(struct hisi_pcie_pmu *pcie_pmu,
+					    struct perf_event *event)
+{
+	struct perf_event *sibling;
+	int idx;
+
+	for (idx = 0; idx < HISI_PCIE_MAX_COUNTERS; idx++) {
+		sibling = pcie_pmu->hw_events[idx];
+		if (!sibling)
+			continue;
+
+		if (!hisi_pcie_pmu_cmp_event(sibling, event))
+			continue;
+
+		/* Related events must be used in group */
+		if (sibling->group_leader == event->group_leader)
+			return idx;
+		else
+			return -EINVAL;
+	}
+
+	return idx;
+}
+
+static int hisi_pcie_pmu_get_event_idx(struct hisi_pcie_pmu *pcie_pmu)
+{
+	int idx;
+
+	for (idx = 0; idx < HISI_PCIE_MAX_COUNTERS; idx++) {
+		if (!pcie_pmu->hw_events[idx])
+			return idx;
+	}
+
+	return -EINVAL;
+}
+
+static void hisi_pcie_pmu_event_update(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 new_cnt, prev_cnt, delta;
+
+	do {
+		prev_cnt = local64_read(&hwc->prev_count);
+		new_cnt = hisi_pcie_pmu_read_counter(event);
+	} while (local64_cmpxchg(&hwc->prev_count, prev_cnt,
+				 new_cnt) != prev_cnt);
+
+	delta = (new_cnt - prev_cnt) & HISI_PCIE_MAX_PERIOD;
+	local64_add(delta, &event->count);
+}
+
+static void hisi_pcie_pmu_read(struct perf_event *event)
+{
+	hisi_pcie_pmu_event_update(event);
+}
+
+static void hisi_pcie_pmu_set_period(struct perf_event *event)
+{
+	struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	local64_set(&hwc->prev_count, HISI_PCIE_INIT_VAL);
+	hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_CNT, idx, HISI_PCIE_INIT_VAL);
+	hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EXT_CNT, idx, HISI_PCIE_INIT_VAL);
+}
+
+static void hisi_pcie_pmu_enable_counter(struct hisi_pcie_pmu *pcie_pmu, struct hw_perf_event *hwc)
+{
+	u32 idx = hwc->idx;
+	u64 val;
+
+	val = hisi_pcie_pmu_readq(pcie_pmu, HISI_PCIE_EVENT_CTRL, idx);
+	val |= HISI_PCIE_EVENT_EN;
+	hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, idx, val);
+}
+
+static void hisi_pcie_pmu_disable_counter(struct hisi_pcie_pmu *pcie_pmu, struct hw_perf_event *hwc)
+{
+	u32 idx = hwc->idx;
+	u64 val;
+
+	val = hisi_pcie_pmu_readq(pcie_pmu, HISI_PCIE_EVENT_CTRL, idx);
+	val &= ~HISI_PCIE_EVENT_EN;
+	hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, idx, val);
+}
+
+static void hisi_pcie_pmu_enable_int(struct hisi_pcie_pmu *pcie_pmu, struct hw_perf_event *hwc)
+{
+	u32 idx = hwc->idx;
+
+	hisi_pcie_pmu_writel(pcie_pmu, HISI_PCIE_INT_MASK, idx, 0);
+}
+
+static void hisi_pcie_pmu_disable_int(struct hisi_pcie_pmu *pcie_pmu, struct hw_perf_event *hwc)
+{
+	u32 idx = hwc->idx;
+
+	hisi_pcie_pmu_writel(pcie_pmu, HISI_PCIE_INT_MASK, idx, 1);
+}
+
+static void hisi_pcie_pmu_reset_counter(struct hisi_pcie_pmu *pcie_pmu, int idx)
+{
+	hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, idx, HISI_PCIE_RESET_CNT);
+	hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, idx, HISI_PCIE_INIT_SET);
+}
+
+static void hisi_pcie_pmu_start(struct perf_event *event, int flags)
+{
+	struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+	u64 prev_cnt;
+
+	if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+		return;
+
+	WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+	hwc->state = 0;
+
+	hisi_pcie_pmu_config_filter(event);
+	hisi_pcie_pmu_enable_counter(pcie_pmu, hwc);
+	hisi_pcie_pmu_enable_int(pcie_pmu, hwc);
+	hisi_pcie_pmu_set_period(event);
+
+	if (flags & PERF_EF_RELOAD) {
+		prev_cnt = local64_read(&hwc->prev_count);
+		hisi_pcie_pmu_writeq(pcie_pmu, hwc->event_base, idx, prev_cnt);
+	}
+
+	perf_event_update_userpage(event);
+}
+
+static void hisi_pcie_pmu_stop(struct perf_event *event, int flags)
+{
+	struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	hisi_pcie_pmu_event_update(event);
+	hisi_pcie_pmu_disable_int(pcie_pmu, hwc);
+	hisi_pcie_pmu_disable_counter(pcie_pmu, hwc);
+	hisi_pcie_pmu_clear_filter(event);
+	WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+	hwc->state |= PERF_HES_STOPPED;
+
+	if (hwc->state & PERF_HES_UPTODATE)
+		return;
+
+	hwc->state |= PERF_HES_UPTODATE;
+}
+
+static int hisi_pcie_pmu_add(struct perf_event *event, int flags)
+{
+	struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+	/* Check all working events to find a related event. */
+	idx = hisi_pcie_pmu_find_related_event(pcie_pmu, event);
+	if (idx < 0)
+		return idx;
+
+	/* Current event shares an enabled counter with the related event */
+	if (idx < HISI_PCIE_MAX_COUNTERS) {
+		hwc->idx = idx;
+		goto start_count;
+	}
+
+	idx = hisi_pcie_pmu_get_event_idx(pcie_pmu);
+	if (idx < 0)
+		return idx;
+
+	hwc->idx = idx;
+	pcie_pmu->hw_events[idx] = event;
+	/* Reset Counter to avoid previous statistic interference. */
+	hisi_pcie_pmu_reset_counter(pcie_pmu, idx);
+
+start_count:
+	if (flags & PERF_EF_START)
+		hisi_pcie_pmu_start(event, PERF_EF_RELOAD);
+
+	return 0;
+}
+
+static void hisi_pcie_pmu_del(struct perf_event *event, int flags)
+{
+	struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	hisi_pcie_pmu_stop(event, PERF_EF_UPDATE);
+	pcie_pmu->hw_events[hwc->idx] = NULL;
+	perf_event_update_userpage(event);
+}
+
+static void hisi_pcie_pmu_enable(struct pmu *pmu)
+{
+	struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(pmu);
+	int num;
+
+	for (num = 0; num < HISI_PCIE_MAX_COUNTERS; num++) {
+		if (pcie_pmu->hw_events[num])
+			break;
+	}
+
+	if (num == HISI_PCIE_MAX_COUNTERS)
+		return;
+
+	writel(HISI_PCIE_GLOBAL_EN, pcie_pmu->base + HISI_PCIE_GLOBAL_CTRL);
+}
+
+static void hisi_pcie_pmu_disable(struct pmu *pmu)
+{
+	struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(pmu);
+
+	writel(HISI_PCIE_GLOBAL_NONE, pcie_pmu->base + HISI_PCIE_GLOBAL_CTRL);
+}
+
+static irqreturn_t hisi_pcie_pmu_irq(int irq, void *data)
+{
+	struct hisi_pcie_pmu *pcie_pmu = data;
+	irqreturn_t ret = IRQ_NONE;
+	struct perf_event *event;
+	u32 overflown;
+	int idx;
+
+	for (idx = 0; idx < HISI_PCIE_MAX_COUNTERS; idx++) {
+		overflown = hisi_pcie_pmu_readl(pcie_pmu, HISI_PCIE_INT_STAT, idx);
+		if (!overflown)
+			continue;
+
+		/* Clear status of interrupt. */
+		hisi_pcie_pmu_writel(pcie_pmu, HISI_PCIE_INT_STAT, idx, 1);
+		event = pcie_pmu->hw_events[idx];
+		if (!event)
+			continue;
+
+		hisi_pcie_pmu_event_update(event);
+		hisi_pcie_pmu_set_period(event);
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
+static int hisi_pcie_pmu_irq_register(struct pci_dev *pdev, struct hisi_pcie_pmu *pcie_pmu)
+{
+	int irq, ret;
+
+	ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
+	if (ret < 0) {
+		pci_err(pdev, "Failed to enable MSI vectors: %d\n", ret);
+		return ret;
+	}
+
+	irq = pci_irq_vector(pdev, 0);
+	ret = request_irq(irq, hisi_pcie_pmu_irq, IRQF_NOBALANCING | IRQF_NO_THREAD, DRV_NAME,
+			  pcie_pmu);
+	if (ret) {
+		pci_err(pdev, "Failed to register IRQ: %d\n", ret);
+		pci_free_irq_vectors(pdev);
+		return ret;
+	}
+
+	pcie_pmu->irq = irq;
+
+	return 0;
+}
+
+static void hisi_pcie_pmu_irq_unregister(struct pci_dev *pdev, struct hisi_pcie_pmu *pcie_pmu)
+{
+	free_irq(pcie_pmu->irq, pcie_pmu);
+	pci_free_irq_vectors(pdev);
+}
+
+static int hisi_pcie_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct hisi_pcie_pmu *pcie_pmu = hlist_entry_safe(node, struct hisi_pcie_pmu, node);
+
+	if (pcie_pmu->on_cpu == -1) {
+		pcie_pmu->on_cpu = cpumask_local_spread(0, dev_to_node(&pcie_pmu->pdev->dev));
+		WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(pcie_pmu->on_cpu)));
+	}
+
+	return 0;
+}
+
+static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct hisi_pcie_pmu *pcie_pmu = hlist_entry_safe(node, struct hisi_pcie_pmu, node);
+	unsigned int target;
+	cpumask_t mask;
+	int numa_node;
+
+	/* Nothing to do if this CPU doesn't own the PMU */
+	if (pcie_pmu->on_cpu != cpu)
+		return 0;
+
+	pcie_pmu->on_cpu = -1;
+
+	/* Choose a local CPU from all online cpus. */
+	numa_node = dev_to_node(&pcie_pmu->pdev->dev);
+	if (cpumask_and(&mask, cpumask_of_node(numa_node), cpu_online_mask) &&
+	    cpumask_andnot(&mask, &mask, cpumask_of(cpu)))
+		target = cpumask_any(&mask);
+	else
+		target = cpumask_any_but(cpu_online_mask, cpu);
+
+	if (target >= nr_cpu_ids) {
+		pci_err(pcie_pmu->pdev, "There is no CPU to set\n");
+		return 0;
+	}
+
+	perf_pmu_migrate_context(&pcie_pmu->pmu, cpu, target);
+	/* Use this CPU for event counting */
+	pcie_pmu->on_cpu = target;
+	WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(target)));
+
+	return 0;
+}
+
+static struct attribute *hisi_pcie_pmu_events_attr[] = {
+	HISI_PCIE_PMU_EVENT_ATTR(rx_mwr_latency, 0x0010),
+	HISI_PCIE_PMU_EVENT_ATTR(rx_mwr_cnt, 0x10010),
+	HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_latency, 0x0210),
+	HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_cnt, 0x10210),
+	HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_latency, 0x0011),
+	HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_cnt, 0x10011),
+	HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_flux, 0x0804),
+	HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_time, 0x10804),
+	HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_flux, 0x0405),
+	HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_time, 0x10405),
+	NULL
+};
+
+static struct attribute_group hisi_pcie_pmu_events_group = {
+	.name = "events",
+	.attrs = hisi_pcie_pmu_events_attr,
+};
+
+static struct attribute *hisi_pcie_pmu_format_attr[] = {
+	HISI_PCIE_PMU_FORMAT_ATTR(event, "config:0-16"),
+	HISI_PCIE_PMU_FORMAT_ATTR(thr_len, "config1:0-3"),
+	HISI_PCIE_PMU_FORMAT_ATTR(thr_mode, "config1:4"),
+	HISI_PCIE_PMU_FORMAT_ATTR(trig_len, "config1:5-8"),
+	HISI_PCIE_PMU_FORMAT_ATTR(trig_mode, "config1:9"),
+	HISI_PCIE_PMU_FORMAT_ATTR(len_mode, "config1:10-11"),
+	HISI_PCIE_PMU_FORMAT_ATTR(port, "config2:0-15"),
+	HISI_PCIE_PMU_FORMAT_ATTR(bdf, "config2:16-31"),
+	NULL
+};
+
+static const struct attribute_group hisi_pcie_pmu_format_group = {
+	.name = "format",
+	.attrs = hisi_pcie_pmu_format_attr,
+};
+
+static struct attribute *hisi_pcie_pmu_bus_attrs[] = {
+	&dev_attr_bus.attr,
+	NULL
+};
+
+static const struct attribute_group hisi_pcie_pmu_bus_attr_group = {
+	.attrs = hisi_pcie_pmu_bus_attrs,
+};
+
+static struct attribute *hisi_pcie_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL
+};
+
+static const struct attribute_group hisi_pcie_pmu_cpumask_attr_group = {
+	.attrs = hisi_pcie_pmu_cpumask_attrs,
+};
+
+static struct attribute *hisi_pcie_pmu_identifier_attrs[] = {
+	&dev_attr_identifier.attr,
+	NULL
+};
+
+static const struct attribute_group hisi_pcie_pmu_identifier_attr_group = {
+	.attrs = hisi_pcie_pmu_identifier_attrs,
+};
+
+static const struct attribute_group *hisi_pcie_pmu_attr_groups[] = {
+	&hisi_pcie_pmu_events_group,
+	&hisi_pcie_pmu_format_group,
+	&hisi_pcie_pmu_bus_attr_group,
+	&hisi_pcie_pmu_cpumask_attr_group,
+	&hisi_pcie_pmu_identifier_attr_group,
+	NULL
+};
+
+static int hisi_pcie_alloc_pmu(struct pci_dev *pdev, struct hisi_pcie_pmu *pcie_pmu)
+{
+	struct hisi_pcie_reg_pair regs;
+	u16 sicl_id, core_id;
+	char *name;
+
+	regs = hisi_pcie_parse_reg_value(pcie_pmu, HISI_PCIE_REG_BDF);
+	pcie_pmu->bdf_min = regs.lo;
+	pcie_pmu->bdf_max = regs.hi;
+
+	regs = hisi_pcie_parse_reg_value(pcie_pmu, HISI_PCIE_REG_INFO);
+	sicl_id = regs.hi;
+	core_id = regs.lo;
+
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_pcie%u_core%u", sicl_id, core_id);
+	if (!name)
+		return -ENOMEM;
+
+	pcie_pmu->pdev = pdev;
+	pcie_pmu->on_cpu = -1;
+	pcie_pmu->identifier = readl(pcie_pmu->base + HISI_PCIE_REG_VERSION);
+	pcie_pmu->pmu = (struct pmu) {
+		.name		= name,
+		.module		= THIS_MODULE,
+		.event_init	= hisi_pcie_pmu_event_init,
+		.pmu_enable	= hisi_pcie_pmu_enable,
+		.pmu_disable	= hisi_pcie_pmu_disable,
+		.add		= hisi_pcie_pmu_add,
+		.del		= hisi_pcie_pmu_del,
+		.start		= hisi_pcie_pmu_start,
+		.stop		= hisi_pcie_pmu_stop,
+		.read		= hisi_pcie_pmu_read,
+		.task_ctx_nr	= perf_invalid_context,
+		.attr_groups	= hisi_pcie_pmu_attr_groups,
+		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
+	};
+
+	return 0;
+}
+
+static int hisi_pcie_init_pmu(struct pci_dev *pdev, struct hisi_pcie_pmu *pcie_pmu)
+{
+	int ret;
+
+	pcie_pmu->base = pci_ioremap_bar(pdev, 2);
+	if (!pcie_pmu->base) {
+		pci_err(pdev, "Ioremap failed for pcie_pmu resource\n");
+		return -ENOMEM;
+	}
+
+	ret = hisi_pcie_alloc_pmu(pdev, pcie_pmu);
+	if (ret)
+		goto err_iounmap;
+
+	ret = hisi_pcie_pmu_irq_register(pdev, pcie_pmu);
+	if (ret)
+		goto err_iounmap;
+
+	ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE, &pcie_pmu->node);
+	if (ret) {
+		pci_err(pdev, "Failed to register hotplug: %d\n", ret);
+		goto err_irq_unregister;
+	}
+
+	ret = perf_pmu_register(&pcie_pmu->pmu, pcie_pmu->pmu.name, -1);
+	if (ret) {
+		pci_err(pdev, "Failed to register PCIe PMU: %d\n", ret);
+		goto err_hotplug_unregister;
+	}
+
+	return ret;
+
+err_hotplug_unregister:
+	cpuhp_state_remove_instance_nocalls(
+		CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE, &pcie_pmu->node);
+
+err_irq_unregister:
+	hisi_pcie_pmu_irq_unregister(pdev, pcie_pmu);
+
+err_iounmap:
+	iounmap(pcie_pmu->base);
+
+	return ret;
+}
+
+static void hisi_pcie_uninit_pmu(struct pci_dev *pdev)
+{
+	struct hisi_pcie_pmu *pcie_pmu = pci_get_drvdata(pdev);
+
+	perf_pmu_unregister(&pcie_pmu->pmu);
+	cpuhp_state_remove_instance_nocalls(
+		CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE, &pcie_pmu->node);
+	hisi_pcie_pmu_irq_unregister(pdev, pcie_pmu);
+	iounmap(pcie_pmu->base);
+}
+
+static int hisi_pcie_init_dev(struct pci_dev *pdev)
+{
+	int ret;
+
+	ret = pcim_enable_device(pdev);
+	if (ret) {
+		pci_err(pdev, "Failed to enable PCI device: %d\n", ret);
+		return ret;
+	}
+
+	ret = pcim_iomap_regions(pdev, BIT(2), DRV_NAME);
+	if (ret < 0) {
+		pci_err(pdev, "Failed to request PCI mem regions: %d\n", ret);
+		return ret;
+	}
+
+	pci_set_master(pdev);
+
+	return 0;
+}
+
+static int hisi_pcie_pmu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct hisi_pcie_pmu *pcie_pmu;
+	int ret;
+
+	pcie_pmu = devm_kzalloc(&pdev->dev, sizeof(*pcie_pmu), GFP_KERNEL);
+	if (!pcie_pmu)
+		return -ENOMEM;
+
+	ret = hisi_pcie_init_dev(pdev);
+	if (ret)
+		return ret;
+
+	ret = hisi_pcie_init_pmu(pdev, pcie_pmu);
+	if (ret)
+		return ret;
+
+	pci_set_drvdata(pdev, pcie_pmu);
+
+	return ret;
+}
+
+static void hisi_pcie_pmu_remove(struct pci_dev *pdev)
+{
+	hisi_pcie_uninit_pmu(pdev);
+	pci_set_drvdata(pdev, NULL);
+}
+
+static const struct pci_device_id hisi_pcie_pmu_ids[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, 0xa12d) },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, hisi_pcie_pmu_ids);
+
+static struct pci_driver hisi_pcie_pmu_driver = {
+	.name = DRV_NAME,
+	.id_table = hisi_pcie_pmu_ids,
+	.probe = hisi_pcie_pmu_probe,
+	.remove = hisi_pcie_pmu_remove,
+};
+
+static int __init hisi_pcie_module_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE,
+				      "AP_PERF_ARM_HISI_PCIE_PMU_ONLINE",
+				      hisi_pcie_pmu_online_cpu,
+				      hisi_pcie_pmu_offline_cpu);
+	if (ret) {
+		pr_err("Failed to setup PCIe PMU hotplug: %d\n", ret);
+		return ret;
+	}
+
+	ret = pci_register_driver(&hisi_pcie_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE);
+
+	return ret;
+}
+module_init(hisi_pcie_module_init);
+
+static void __exit hisi_pcie_module_exit(void)
+{
+	pci_unregister_driver(&hisi_pcie_pmu_driver);
+	cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE);
+}
+module_exit(hisi_pcie_module_exit);
+
+MODULE_DESCRIPTION("HiSilicon PCIe PMU driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Qi Liu <liuqi115@huawei.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c
new file mode 100644
index 0000000000..40f1bc9f9b
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * HiSilicon SoC CPA(Coherency Protocol Agent) hardware event counters support
+ *
+ * Copyright (C) 2022 HiSilicon Limited
+ * Author: Qi Liu <liuqi115@huawei.com>
+ *
+ * This code is based on the uncore PMUs like arm-cci and arm-ccn.
+ */
+
+#define pr_fmt(fmt) "cpa pmu: " fmt
+#include <linux/acpi.h>
+#include <linux/bug.h>
+#include <linux/cpuhotplug.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/smp.h>
+
+#include "hisi_uncore_pmu.h"
+
+/* CPA register definition */
+#define CPA_PERF_CTRL		0x1c00
+#define CPA_EVENT_CTRL		0x1c04
+#define CPA_INT_MASK		0x1c70
+#define CPA_INT_STATUS		0x1c78
+#define CPA_INT_CLEAR		0x1c7c
+#define CPA_EVENT_TYPE0		0x1c80
+#define CPA_VERSION		0x1cf0
+#define CPA_CNT0_LOWER		0x1d00
+#define CPA_CFG_REG		0x0534
+
+/* CPA operation command */
+#define CPA_PERF_CTRL_EN	BIT_ULL(0)
+#define CPA_EVTYPE_MASK		0xffUL
+#define CPA_PM_CTRL		BIT_ULL(9)
+
+/* CPA has 8-counters */
+#define CPA_NR_COUNTERS		0x8
+#define CPA_COUNTER_BITS	64
+#define CPA_NR_EVENTS		0xff
+#define CPA_REG_OFFSET		0x8
+
+static u32 hisi_cpa_pmu_get_counter_offset(int idx)
+{
+	return (CPA_CNT0_LOWER + idx * CPA_REG_OFFSET);
+}
+
+static u64 hisi_cpa_pmu_read_counter(struct hisi_pmu *cpa_pmu,
+				     struct hw_perf_event *hwc)
+{
+	return readq(cpa_pmu->base + hisi_cpa_pmu_get_counter_offset(hwc->idx));
+}
+
+static void hisi_cpa_pmu_write_counter(struct hisi_pmu *cpa_pmu,
+				       struct hw_perf_event *hwc, u64 val)
+{
+	writeq(val, cpa_pmu->base + hisi_cpa_pmu_get_counter_offset(hwc->idx));
+}
+
+static void hisi_cpa_pmu_write_evtype(struct hisi_pmu *cpa_pmu, int idx,
+				      u32 type)
+{
+	u32 reg, reg_idx, shift, val;
+
+	/*
+	 * Select the appropriate event select register(CPA_EVENT_TYPE0/1).
+	 * There are 2 event select registers for the 8 hardware counters.
+	 * Event code is 8-bits and for the former 4 hardware counters,
+	 * CPA_EVENT_TYPE0 is chosen. For the latter 4 hardware counters,
+	 * CPA_EVENT_TYPE1 is chosen.
+	 */
+	reg = CPA_EVENT_TYPE0 + (idx / 4) * 4;
+	reg_idx = idx % 4;
+	shift = CPA_REG_OFFSET * reg_idx;
+
+	/* Write event code to CPA_EVENT_TYPEx Register */
+	val = readl(cpa_pmu->base + reg);
+	val &= ~(CPA_EVTYPE_MASK << shift);
+	val |= type << shift;
+	writel(val, cpa_pmu->base + reg);
+}
+
+static void hisi_cpa_pmu_start_counters(struct hisi_pmu *cpa_pmu)
+{
+	u32 val;
+
+	val = readl(cpa_pmu->base + CPA_PERF_CTRL);
+	val |= CPA_PERF_CTRL_EN;
+	writel(val, cpa_pmu->base + CPA_PERF_CTRL);
+}
+
+static void hisi_cpa_pmu_stop_counters(struct hisi_pmu *cpa_pmu)
+{
+	u32 val;
+
+	val = readl(cpa_pmu->base + CPA_PERF_CTRL);
+	val &= ~(CPA_PERF_CTRL_EN);
+	writel(val, cpa_pmu->base + CPA_PERF_CTRL);
+}
+
+static void hisi_cpa_pmu_disable_pm(struct hisi_pmu *cpa_pmu)
+{
+	u32 val;
+
+	val = readl(cpa_pmu->base + CPA_CFG_REG);
+	val |= CPA_PM_CTRL;
+	writel(val, cpa_pmu->base + CPA_CFG_REG);
+}
+
+static void hisi_cpa_pmu_enable_pm(struct hisi_pmu *cpa_pmu)
+{
+	u32 val;
+
+	val = readl(cpa_pmu->base + CPA_CFG_REG);
+	val &= ~(CPA_PM_CTRL);
+	writel(val, cpa_pmu->base + CPA_CFG_REG);
+}
+
+static void hisi_cpa_pmu_enable_counter(struct hisi_pmu *cpa_pmu,
+					struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Enable counter index in CPA_EVENT_CTRL register */
+	val = readl(cpa_pmu->base + CPA_EVENT_CTRL);
+	val |= 1 << hwc->idx;
+	writel(val, cpa_pmu->base + CPA_EVENT_CTRL);
+}
+
+static void hisi_cpa_pmu_disable_counter(struct hisi_pmu *cpa_pmu,
+					 struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Clear counter index in CPA_EVENT_CTRL register */
+	val = readl(cpa_pmu->base + CPA_EVENT_CTRL);
+	val &= ~(1UL << hwc->idx);
+	writel(val, cpa_pmu->base + CPA_EVENT_CTRL);
+}
+
+static void hisi_cpa_pmu_enable_counter_int(struct hisi_pmu *cpa_pmu,
+					    struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Write 0 to enable interrupt */
+	val = readl(cpa_pmu->base + CPA_INT_MASK);
+	val &= ~(1UL << hwc->idx);
+	writel(val, cpa_pmu->base + CPA_INT_MASK);
+}
+
+static void hisi_cpa_pmu_disable_counter_int(struct hisi_pmu *cpa_pmu,
+					     struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Write 1 to mask interrupt */
+	val = readl(cpa_pmu->base + CPA_INT_MASK);
+	val |= 1 << hwc->idx;
+	writel(val, cpa_pmu->base + CPA_INT_MASK);
+}
+
+static u32 hisi_cpa_pmu_get_int_status(struct hisi_pmu *cpa_pmu)
+{
+	return readl(cpa_pmu->base + CPA_INT_STATUS);
+}
+
+static void hisi_cpa_pmu_clear_int_status(struct hisi_pmu *cpa_pmu, int idx)
+{
+	writel(1 << idx, cpa_pmu->base + CPA_INT_CLEAR);
+}
+
+static const struct acpi_device_id hisi_cpa_pmu_acpi_match[] = {
+	{ "HISI0281", },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, hisi_cpa_pmu_acpi_match);
+
+static int hisi_cpa_pmu_init_data(struct platform_device *pdev,
+				  struct hisi_pmu *cpa_pmu)
+{
+	if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
+				     &cpa_pmu->sicl_id)) {
+		dev_err(&pdev->dev, "Can not read sicl-id\n");
+		return -EINVAL;
+	}
+
+	if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id",
+				     &cpa_pmu->index_id)) {
+		dev_err(&pdev->dev, "Cannot read idx-id\n");
+		return -EINVAL;
+	}
+
+	cpa_pmu->ccl_id = -1;
+	cpa_pmu->sccl_id = -1;
+	cpa_pmu->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(cpa_pmu->base))
+		return PTR_ERR(cpa_pmu->base);
+
+	cpa_pmu->identifier = readl(cpa_pmu->base + CPA_VERSION);
+
+	return 0;
+}
+
+static struct attribute *hisi_cpa_pmu_format_attr[] = {
+	HISI_PMU_FORMAT_ATTR(event, "config:0-15"),
+	NULL
+};
+
+static const struct attribute_group hisi_cpa_pmu_format_group = {
+	.name = "format",
+	.attrs = hisi_cpa_pmu_format_attr,
+};
+
+static struct attribute *hisi_cpa_pmu_events_attr[] = {
+	HISI_PMU_EVENT_ATTR(cpa_cycles,		0x00),
+	HISI_PMU_EVENT_ATTR(cpa_p1_wr_dat,	0x61),
+	HISI_PMU_EVENT_ATTR(cpa_p1_rd_dat,	0x62),
+	HISI_PMU_EVENT_ATTR(cpa_p0_wr_dat,	0xE1),
+	HISI_PMU_EVENT_ATTR(cpa_p0_rd_dat,	0xE2),
+	NULL
+};
+
+static const struct attribute_group hisi_cpa_pmu_events_group = {
+	.name = "events",
+	.attrs = hisi_cpa_pmu_events_attr,
+};
+
+static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
+
+static struct attribute *hisi_cpa_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL
+};
+
+static const struct attribute_group hisi_cpa_pmu_cpumask_attr_group = {
+	.attrs = hisi_cpa_pmu_cpumask_attrs,
+};
+
+static struct device_attribute hisi_cpa_pmu_identifier_attr =
+	__ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
+
+static struct attribute *hisi_cpa_pmu_identifier_attrs[] = {
+	&hisi_cpa_pmu_identifier_attr.attr,
+	NULL
+};
+
+static const struct attribute_group hisi_cpa_pmu_identifier_group = {
+	.attrs = hisi_cpa_pmu_identifier_attrs,
+};
+
+static const struct attribute_group *hisi_cpa_pmu_attr_groups[] = {
+	&hisi_cpa_pmu_format_group,
+	&hisi_cpa_pmu_events_group,
+	&hisi_cpa_pmu_cpumask_attr_group,
+	&hisi_cpa_pmu_identifier_group,
+	NULL
+};
+
+static const struct hisi_uncore_ops hisi_uncore_cpa_pmu_ops = {
+	.write_evtype           = hisi_cpa_pmu_write_evtype,
+	.get_event_idx		= hisi_uncore_pmu_get_event_idx,
+	.start_counters		= hisi_cpa_pmu_start_counters,
+	.stop_counters		= hisi_cpa_pmu_stop_counters,
+	.enable_counter		= hisi_cpa_pmu_enable_counter,
+	.disable_counter	= hisi_cpa_pmu_disable_counter,
+	.enable_counter_int	= hisi_cpa_pmu_enable_counter_int,
+	.disable_counter_int	= hisi_cpa_pmu_disable_counter_int,
+	.write_counter		= hisi_cpa_pmu_write_counter,
+	.read_counter		= hisi_cpa_pmu_read_counter,
+	.get_int_status		= hisi_cpa_pmu_get_int_status,
+	.clear_int_status	= hisi_cpa_pmu_clear_int_status,
+};
+
+static int hisi_cpa_pmu_dev_probe(struct platform_device *pdev,
+				  struct hisi_pmu *cpa_pmu)
+{
+	int ret;
+
+	ret = hisi_cpa_pmu_init_data(pdev, cpa_pmu);
+	if (ret)
+		return ret;
+
+	ret = hisi_uncore_pmu_init_irq(cpa_pmu, pdev);
+	if (ret)
+		return ret;
+
+	cpa_pmu->counter_bits = CPA_COUNTER_BITS;
+	cpa_pmu->check_event = CPA_NR_EVENTS;
+	cpa_pmu->pmu_events.attr_groups = hisi_cpa_pmu_attr_groups;
+	cpa_pmu->ops = &hisi_uncore_cpa_pmu_ops;
+	cpa_pmu->num_counters = CPA_NR_COUNTERS;
+	cpa_pmu->dev = &pdev->dev;
+	cpa_pmu->on_cpu = -1;
+
+	return 0;
+}
+
+static int hisi_cpa_pmu_probe(struct platform_device *pdev)
+{
+	struct hisi_pmu *cpa_pmu;
+	char *name;
+	int ret;
+
+	cpa_pmu = devm_kzalloc(&pdev->dev, sizeof(*cpa_pmu), GFP_KERNEL);
+	if (!cpa_pmu)
+		return -ENOMEM;
+
+	ret = hisi_cpa_pmu_dev_probe(pdev, cpa_pmu);
+	if (ret)
+		return ret;
+
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_cpa%u",
+			      cpa_pmu->sicl_id, cpa_pmu->index_id);
+	if (!name)
+		return -ENOMEM;
+
+	hisi_pmu_init(cpa_pmu, THIS_MODULE);
+
+	/* Power Management should be disabled before using CPA PMU. */
+	hisi_cpa_pmu_disable_pm(cpa_pmu);
+	ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE,
+				       &cpa_pmu->node);
+	if (ret) {
+		dev_err(&pdev->dev, "Error %d registering hotplug\n", ret);
+		hisi_cpa_pmu_enable_pm(cpa_pmu);
+		return ret;
+	}
+
+	ret = perf_pmu_register(&cpa_pmu->pmu, name, -1);
+	if (ret) {
+		dev_err(cpa_pmu->dev, "PMU register failed\n");
+		cpuhp_state_remove_instance_nocalls(
+			CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE, &cpa_pmu->node);
+		hisi_cpa_pmu_enable_pm(cpa_pmu);
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, cpa_pmu);
+	return ret;
+}
+
+static int hisi_cpa_pmu_remove(struct platform_device *pdev)
+{
+	struct hisi_pmu *cpa_pmu = platform_get_drvdata(pdev);
+
+	perf_pmu_unregister(&cpa_pmu->pmu);
+	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE,
+					    &cpa_pmu->node);
+	hisi_cpa_pmu_enable_pm(cpa_pmu);
+	return 0;
+}
+
+static struct platform_driver hisi_cpa_pmu_driver = {
+	.driver = {
+		.name = "hisi_cpa_pmu",
+		.acpi_match_table = ACPI_PTR(hisi_cpa_pmu_acpi_match),
+		.suppress_bind_attrs = true,
+	},
+	.probe = hisi_cpa_pmu_probe,
+	.remove = hisi_cpa_pmu_remove,
+};
+
+static int __init hisi_cpa_pmu_module_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE,
+				      "AP_PERF_ARM_HISI_CPA_ONLINE",
+				      hisi_uncore_pmu_online_cpu,
+				      hisi_uncore_pmu_offline_cpu);
+	if (ret) {
+		pr_err("setup hotplug failed: %d\n", ret);
+		return ret;
+	}
+
+	ret = platform_driver_register(&hisi_cpa_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE);
+
+	return ret;
+}
+module_init(hisi_cpa_pmu_module_init);
+
+static void __exit hisi_cpa_pmu_module_exit(void)
+{
+	platform_driver_unregister(&hisi_cpa_pmu_driver);
+	cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE);
+}
+module_exit(hisi_cpa_pmu_module_exit);
+
+MODULE_DESCRIPTION("HiSilicon SoC CPA PMU driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Qi Liu <liuqi115@huawei.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
new file mode 100644
index 0000000000..ffb039d05d
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
@@ -0,0 +1,586 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * HiSilicon SoC DDRC uncore Hardware event counters support
+ *
+ * Copyright (C) 2017 HiSilicon Limited
+ * Author: Shaokun Zhang <zhangshaokun@hisilicon.com>
+ *         Anurup M <anurup.m@huawei.com>
+ *
+ * This code is based on the uncore PMUs like arm-cci and arm-ccn.
+ */
+#include <linux/acpi.h>
+#include <linux/bug.h>
+#include <linux/cpuhotplug.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/smp.h>
+
+#include "hisi_uncore_pmu.h"
+
+/* DDRC register definition in v1 */
+#define DDRC_PERF_CTRL		0x010
+#define DDRC_FLUX_WR		0x380
+#define DDRC_FLUX_RD		0x384
+#define DDRC_FLUX_WCMD          0x388
+#define DDRC_FLUX_RCMD          0x38c
+#define DDRC_PRE_CMD            0x3c0
+#define DDRC_ACT_CMD            0x3c4
+#define DDRC_RNK_CHG            0x3cc
+#define DDRC_RW_CHG             0x3d0
+#define DDRC_EVENT_CTRL         0x6C0
+#define DDRC_INT_MASK		0x6c8
+#define DDRC_INT_STATUS		0x6cc
+#define DDRC_INT_CLEAR		0x6d0
+#define DDRC_VERSION		0x710
+
+/* DDRC register definition in v2 */
+#define DDRC_V2_INT_MASK	0x528
+#define DDRC_V2_INT_STATUS	0x52c
+#define DDRC_V2_INT_CLEAR	0x530
+#define DDRC_V2_EVENT_CNT	0xe00
+#define DDRC_V2_EVENT_CTRL	0xe70
+#define DDRC_V2_EVENT_TYPE	0xe74
+#define DDRC_V2_PERF_CTRL	0xeA0
+
+/* DDRC has 8-counters */
+#define DDRC_NR_COUNTERS	0x8
+#define DDRC_V1_PERF_CTRL_EN	0x2
+#define DDRC_V2_PERF_CTRL_EN	0x1
+#define DDRC_V1_NR_EVENTS	0x7
+#define DDRC_V2_NR_EVENTS	0x90
+
+/*
+ * For PMU v1, there are eight-events and every event has been mapped
+ * to fixed-purpose counters which register offset is not consistent.
+ * Therefore there is no write event type and we assume that event
+ * code (0 to 7) is equal to counter index in PMU driver.
+ */
+#define GET_DDRC_EVENTID(hwc)	(hwc->config_base & 0x7)
+
+static const u32 ddrc_reg_off[] = {
+	DDRC_FLUX_WR, DDRC_FLUX_RD, DDRC_FLUX_WCMD, DDRC_FLUX_RCMD,
+	DDRC_PRE_CMD, DDRC_ACT_CMD, DDRC_RNK_CHG, DDRC_RW_CHG
+};
+
+/*
+ * Select the counter register offset using the counter index.
+ * In PMU v1, there are no programmable counter, the count
+ * is read form the statistics counter register itself.
+ */
+static u32 hisi_ddrc_pmu_v1_get_counter_offset(int cntr_idx)
+{
+	return ddrc_reg_off[cntr_idx];
+}
+
+static u32 hisi_ddrc_pmu_v2_get_counter_offset(int cntr_idx)
+{
+	return DDRC_V2_EVENT_CNT + cntr_idx * 8;
+}
+
+static u64 hisi_ddrc_pmu_v1_read_counter(struct hisi_pmu *ddrc_pmu,
+				      struct hw_perf_event *hwc)
+{
+	return readl(ddrc_pmu->base +
+		     hisi_ddrc_pmu_v1_get_counter_offset(hwc->idx));
+}
+
+static void hisi_ddrc_pmu_v1_write_counter(struct hisi_pmu *ddrc_pmu,
+					struct hw_perf_event *hwc, u64 val)
+{
+	writel((u32)val,
+	       ddrc_pmu->base + hisi_ddrc_pmu_v1_get_counter_offset(hwc->idx));
+}
+
+static u64 hisi_ddrc_pmu_v2_read_counter(struct hisi_pmu *ddrc_pmu,
+					 struct hw_perf_event *hwc)
+{
+	return readq(ddrc_pmu->base +
+		     hisi_ddrc_pmu_v2_get_counter_offset(hwc->idx));
+}
+
+static void hisi_ddrc_pmu_v2_write_counter(struct hisi_pmu *ddrc_pmu,
+					   struct hw_perf_event *hwc, u64 val)
+{
+	writeq(val,
+	       ddrc_pmu->base + hisi_ddrc_pmu_v2_get_counter_offset(hwc->idx));
+}
+
+/*
+ * For DDRC PMU v1, event has been mapped to fixed-purpose counter by hardware,
+ * so there is no need to write event type, while it is programmable counter in
+ * PMU v2.
+ */
+static void hisi_ddrc_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx,
+				       u32 type)
+{
+	u32 offset;
+
+	if (hha_pmu->identifier >= HISI_PMU_V2) {
+		offset = DDRC_V2_EVENT_TYPE + 4 * idx;
+		writel(type, hha_pmu->base + offset);
+	}
+}
+
+static void hisi_ddrc_pmu_v1_start_counters(struct hisi_pmu *ddrc_pmu)
+{
+	u32 val;
+
+	/* Set perf_enable in DDRC_PERF_CTRL to start event counting */
+	val = readl(ddrc_pmu->base + DDRC_PERF_CTRL);
+	val |= DDRC_V1_PERF_CTRL_EN;
+	writel(val, ddrc_pmu->base + DDRC_PERF_CTRL);
+}
+
+static void hisi_ddrc_pmu_v1_stop_counters(struct hisi_pmu *ddrc_pmu)
+{
+	u32 val;
+
+	/* Clear perf_enable in DDRC_PERF_CTRL to stop event counting */
+	val = readl(ddrc_pmu->base + DDRC_PERF_CTRL);
+	val &= ~DDRC_V1_PERF_CTRL_EN;
+	writel(val, ddrc_pmu->base + DDRC_PERF_CTRL);
+}
+
+static void hisi_ddrc_pmu_v1_enable_counter(struct hisi_pmu *ddrc_pmu,
+					    struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Set counter index(event code) in DDRC_EVENT_CTRL register */
+	val = readl(ddrc_pmu->base + DDRC_EVENT_CTRL);
+	val |= (1 << GET_DDRC_EVENTID(hwc));
+	writel(val, ddrc_pmu->base + DDRC_EVENT_CTRL);
+}
+
+static void hisi_ddrc_pmu_v1_disable_counter(struct hisi_pmu *ddrc_pmu,
+					     struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Clear counter index(event code) in DDRC_EVENT_CTRL register */
+	val = readl(ddrc_pmu->base + DDRC_EVENT_CTRL);
+	val &= ~(1 << GET_DDRC_EVENTID(hwc));
+	writel(val, ddrc_pmu->base + DDRC_EVENT_CTRL);
+}
+
+static int hisi_ddrc_pmu_v1_get_event_idx(struct perf_event *event)
+{
+	struct hisi_pmu *ddrc_pmu = to_hisi_pmu(event->pmu);
+	unsigned long *used_mask = ddrc_pmu->pmu_events.used_mask;
+	struct hw_perf_event *hwc = &event->hw;
+	/* For DDRC PMU, we use event code as counter index */
+	int idx = GET_DDRC_EVENTID(hwc);
+
+	if (test_bit(idx, used_mask))
+		return -EAGAIN;
+
+	set_bit(idx, used_mask);
+
+	return idx;
+}
+
+static int hisi_ddrc_pmu_v2_get_event_idx(struct perf_event *event)
+{
+	return hisi_uncore_pmu_get_event_idx(event);
+}
+
+static void hisi_ddrc_pmu_v2_start_counters(struct hisi_pmu *ddrc_pmu)
+{
+	u32 val;
+
+	val = readl(ddrc_pmu->base + DDRC_V2_PERF_CTRL);
+	val |= DDRC_V2_PERF_CTRL_EN;
+	writel(val, ddrc_pmu->base + DDRC_V2_PERF_CTRL);
+}
+
+static void hisi_ddrc_pmu_v2_stop_counters(struct hisi_pmu *ddrc_pmu)
+{
+	u32 val;
+
+	val = readl(ddrc_pmu->base + DDRC_V2_PERF_CTRL);
+	val &= ~DDRC_V2_PERF_CTRL_EN;
+	writel(val, ddrc_pmu->base + DDRC_V2_PERF_CTRL);
+}
+
+static void hisi_ddrc_pmu_v2_enable_counter(struct hisi_pmu *ddrc_pmu,
+					    struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	val = readl(ddrc_pmu->base + DDRC_V2_EVENT_CTRL);
+	val |= 1 << hwc->idx;
+	writel(val, ddrc_pmu->base + DDRC_V2_EVENT_CTRL);
+}
+
+static void hisi_ddrc_pmu_v2_disable_counter(struct hisi_pmu *ddrc_pmu,
+					     struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	val = readl(ddrc_pmu->base + DDRC_V2_EVENT_CTRL);
+	val &= ~(1 << hwc->idx);
+	writel(val, ddrc_pmu->base + DDRC_V2_EVENT_CTRL);
+}
+
+static void hisi_ddrc_pmu_v1_enable_counter_int(struct hisi_pmu *ddrc_pmu,
+						struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Write 0 to enable interrupt */
+	val = readl(ddrc_pmu->base + DDRC_INT_MASK);
+	val &= ~(1 << hwc->idx);
+	writel(val, ddrc_pmu->base + DDRC_INT_MASK);
+}
+
+static void hisi_ddrc_pmu_v1_disable_counter_int(struct hisi_pmu *ddrc_pmu,
+						 struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Write 1 to mask interrupt */
+	val = readl(ddrc_pmu->base + DDRC_INT_MASK);
+	val |= 1 << hwc->idx;
+	writel(val, ddrc_pmu->base + DDRC_INT_MASK);
+}
+
+static void hisi_ddrc_pmu_v2_enable_counter_int(struct hisi_pmu *ddrc_pmu,
+						struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	val = readl(ddrc_pmu->base + DDRC_V2_INT_MASK);
+	val &= ~(1 << hwc->idx);
+	writel(val, ddrc_pmu->base + DDRC_V2_INT_MASK);
+}
+
+static void hisi_ddrc_pmu_v2_disable_counter_int(struct hisi_pmu *ddrc_pmu,
+						struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	val = readl(ddrc_pmu->base + DDRC_V2_INT_MASK);
+	val |= 1 << hwc->idx;
+	writel(val, ddrc_pmu->base + DDRC_V2_INT_MASK);
+}
+
+static u32 hisi_ddrc_pmu_v1_get_int_status(struct hisi_pmu *ddrc_pmu)
+{
+	return readl(ddrc_pmu->base + DDRC_INT_STATUS);
+}
+
+static void hisi_ddrc_pmu_v1_clear_int_status(struct hisi_pmu *ddrc_pmu,
+					      int idx)
+{
+	writel(1 << idx, ddrc_pmu->base + DDRC_INT_CLEAR);
+}
+
+static u32 hisi_ddrc_pmu_v2_get_int_status(struct hisi_pmu *ddrc_pmu)
+{
+	return readl(ddrc_pmu->base + DDRC_V2_INT_STATUS);
+}
+
+static void hisi_ddrc_pmu_v2_clear_int_status(struct hisi_pmu *ddrc_pmu,
+					      int idx)
+{
+	writel(1 << idx, ddrc_pmu->base + DDRC_V2_INT_CLEAR);
+}
+
+static const struct acpi_device_id hisi_ddrc_pmu_acpi_match[] = {
+	{ "HISI0233", },
+	{ "HISI0234", },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match);
+
+static int hisi_ddrc_pmu_init_data(struct platform_device *pdev,
+				   struct hisi_pmu *ddrc_pmu)
+{
+	/*
+	 * Use the SCCL_ID and DDRC channel ID to identify the
+	 * DDRC PMU, while SCCL_ID is in MPIDR[aff2].
+	 */
+	if (device_property_read_u32(&pdev->dev, "hisilicon,ch-id",
+				     &ddrc_pmu->index_id)) {
+		dev_err(&pdev->dev, "Can not read ddrc channel-id!\n");
+		return -EINVAL;
+	}
+
+	if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
+				     &ddrc_pmu->sccl_id)) {
+		dev_err(&pdev->dev, "Can not read ddrc sccl-id!\n");
+		return -EINVAL;
+	}
+	/* DDRC PMUs only share the same SCCL */
+	ddrc_pmu->ccl_id = -1;
+
+	ddrc_pmu->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(ddrc_pmu->base)) {
+		dev_err(&pdev->dev, "ioremap failed for ddrc_pmu resource\n");
+		return PTR_ERR(ddrc_pmu->base);
+	}
+
+	ddrc_pmu->identifier = readl(ddrc_pmu->base + DDRC_VERSION);
+	if (ddrc_pmu->identifier >= HISI_PMU_V2) {
+		if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id",
+					     &ddrc_pmu->sub_id)) {
+			dev_err(&pdev->dev, "Can not read sub-id!\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static struct attribute *hisi_ddrc_pmu_v1_format_attr[] = {
+	HISI_PMU_FORMAT_ATTR(event, "config:0-4"),
+	NULL,
+};
+
+static const struct attribute_group hisi_ddrc_pmu_v1_format_group = {
+	.name = "format",
+	.attrs = hisi_ddrc_pmu_v1_format_attr,
+};
+
+static struct attribute *hisi_ddrc_pmu_v2_format_attr[] = {
+	HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+	NULL
+};
+
+static const struct attribute_group hisi_ddrc_pmu_v2_format_group = {
+	.name = "format",
+	.attrs = hisi_ddrc_pmu_v2_format_attr,
+};
+
+static struct attribute *hisi_ddrc_pmu_v1_events_attr[] = {
+	HISI_PMU_EVENT_ATTR(flux_wr,		0x00),
+	HISI_PMU_EVENT_ATTR(flux_rd,		0x01),
+	HISI_PMU_EVENT_ATTR(flux_wcmd,		0x02),
+	HISI_PMU_EVENT_ATTR(flux_rcmd,		0x03),
+	HISI_PMU_EVENT_ATTR(pre_cmd,		0x04),
+	HISI_PMU_EVENT_ATTR(act_cmd,		0x05),
+	HISI_PMU_EVENT_ATTR(rnk_chg,		0x06),
+	HISI_PMU_EVENT_ATTR(rw_chg,		0x07),
+	NULL,
+};
+
+static const struct attribute_group hisi_ddrc_pmu_v1_events_group = {
+	.name = "events",
+	.attrs = hisi_ddrc_pmu_v1_events_attr,
+};
+
+static struct attribute *hisi_ddrc_pmu_v2_events_attr[] = {
+	HISI_PMU_EVENT_ATTR(cycles,		0x00),
+	HISI_PMU_EVENT_ATTR(flux_wr,		0x83),
+	HISI_PMU_EVENT_ATTR(flux_rd,		0x84),
+	NULL
+};
+
+static const struct attribute_group hisi_ddrc_pmu_v2_events_group = {
+	.name = "events",
+	.attrs = hisi_ddrc_pmu_v2_events_attr,
+};
+
+static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
+
+static struct attribute *hisi_ddrc_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static const struct attribute_group hisi_ddrc_pmu_cpumask_attr_group = {
+	.attrs = hisi_ddrc_pmu_cpumask_attrs,
+};
+
+static struct device_attribute hisi_ddrc_pmu_identifier_attr =
+	__ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
+
+static struct attribute *hisi_ddrc_pmu_identifier_attrs[] = {
+	&hisi_ddrc_pmu_identifier_attr.attr,
+	NULL
+};
+
+static const struct attribute_group hisi_ddrc_pmu_identifier_group = {
+	.attrs = hisi_ddrc_pmu_identifier_attrs,
+};
+
+static const struct attribute_group *hisi_ddrc_pmu_v1_attr_groups[] = {
+	&hisi_ddrc_pmu_v1_format_group,
+	&hisi_ddrc_pmu_v1_events_group,
+	&hisi_ddrc_pmu_cpumask_attr_group,
+	&hisi_ddrc_pmu_identifier_group,
+	NULL,
+};
+
+static const struct attribute_group *hisi_ddrc_pmu_v2_attr_groups[] = {
+	&hisi_ddrc_pmu_v2_format_group,
+	&hisi_ddrc_pmu_v2_events_group,
+	&hisi_ddrc_pmu_cpumask_attr_group,
+	&hisi_ddrc_pmu_identifier_group,
+	NULL
+};
+
+static const struct hisi_uncore_ops hisi_uncore_ddrc_v1_ops = {
+	.write_evtype           = hisi_ddrc_pmu_write_evtype,
+	.get_event_idx		= hisi_ddrc_pmu_v1_get_event_idx,
+	.start_counters		= hisi_ddrc_pmu_v1_start_counters,
+	.stop_counters		= hisi_ddrc_pmu_v1_stop_counters,
+	.enable_counter		= hisi_ddrc_pmu_v1_enable_counter,
+	.disable_counter	= hisi_ddrc_pmu_v1_disable_counter,
+	.enable_counter_int	= hisi_ddrc_pmu_v1_enable_counter_int,
+	.disable_counter_int	= hisi_ddrc_pmu_v1_disable_counter_int,
+	.write_counter		= hisi_ddrc_pmu_v1_write_counter,
+	.read_counter		= hisi_ddrc_pmu_v1_read_counter,
+	.get_int_status		= hisi_ddrc_pmu_v1_get_int_status,
+	.clear_int_status	= hisi_ddrc_pmu_v1_clear_int_status,
+};
+
+static const struct hisi_uncore_ops hisi_uncore_ddrc_v2_ops = {
+	.write_evtype           = hisi_ddrc_pmu_write_evtype,
+	.get_event_idx		= hisi_ddrc_pmu_v2_get_event_idx,
+	.start_counters		= hisi_ddrc_pmu_v2_start_counters,
+	.stop_counters		= hisi_ddrc_pmu_v2_stop_counters,
+	.enable_counter		= hisi_ddrc_pmu_v2_enable_counter,
+	.disable_counter	= hisi_ddrc_pmu_v2_disable_counter,
+	.enable_counter_int	= hisi_ddrc_pmu_v2_enable_counter_int,
+	.disable_counter_int	= hisi_ddrc_pmu_v2_disable_counter_int,
+	.write_counter		= hisi_ddrc_pmu_v2_write_counter,
+	.read_counter		= hisi_ddrc_pmu_v2_read_counter,
+	.get_int_status		= hisi_ddrc_pmu_v2_get_int_status,
+	.clear_int_status	= hisi_ddrc_pmu_v2_clear_int_status,
+};
+
+static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev,
+				   struct hisi_pmu *ddrc_pmu)
+{
+	int ret;
+
+	ret = hisi_ddrc_pmu_init_data(pdev, ddrc_pmu);
+	if (ret)
+		return ret;
+
+	ret = hisi_uncore_pmu_init_irq(ddrc_pmu, pdev);
+	if (ret)
+		return ret;
+
+	if (ddrc_pmu->identifier >= HISI_PMU_V2) {
+		ddrc_pmu->counter_bits = 48;
+		ddrc_pmu->check_event = DDRC_V2_NR_EVENTS;
+		ddrc_pmu->pmu_events.attr_groups = hisi_ddrc_pmu_v2_attr_groups;
+		ddrc_pmu->ops = &hisi_uncore_ddrc_v2_ops;
+	} else {
+		ddrc_pmu->counter_bits = 32;
+		ddrc_pmu->check_event = DDRC_V1_NR_EVENTS;
+		ddrc_pmu->pmu_events.attr_groups = hisi_ddrc_pmu_v1_attr_groups;
+		ddrc_pmu->ops = &hisi_uncore_ddrc_v1_ops;
+	}
+
+	ddrc_pmu->num_counters = DDRC_NR_COUNTERS;
+	ddrc_pmu->dev = &pdev->dev;
+	ddrc_pmu->on_cpu = -1;
+
+	return 0;
+}
+
+static int hisi_ddrc_pmu_probe(struct platform_device *pdev)
+{
+	struct hisi_pmu *ddrc_pmu;
+	char *name;
+	int ret;
+
+	ddrc_pmu = devm_kzalloc(&pdev->dev, sizeof(*ddrc_pmu), GFP_KERNEL);
+	if (!ddrc_pmu)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, ddrc_pmu);
+
+	ret = hisi_ddrc_pmu_dev_probe(pdev, ddrc_pmu);
+	if (ret)
+		return ret;
+
+	if (ddrc_pmu->identifier >= HISI_PMU_V2)
+		name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
+				      "hisi_sccl%u_ddrc%u_%u",
+				      ddrc_pmu->sccl_id, ddrc_pmu->index_id,
+				      ddrc_pmu->sub_id);
+	else
+		name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
+				      "hisi_sccl%u_ddrc%u", ddrc_pmu->sccl_id,
+				      ddrc_pmu->index_id);
+
+	if (!name)
+		return -ENOMEM;
+
+	ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE,
+				       &ddrc_pmu->node);
+	if (ret) {
+		dev_err(&pdev->dev, "Error %d registering hotplug;\n", ret);
+		return ret;
+	}
+
+	hisi_pmu_init(ddrc_pmu, THIS_MODULE);
+
+	ret = perf_pmu_register(&ddrc_pmu->pmu, name, -1);
+	if (ret) {
+		dev_err(ddrc_pmu->dev, "DDRC PMU register failed!\n");
+		cpuhp_state_remove_instance_nocalls(
+			CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, &ddrc_pmu->node);
+	}
+
+	return ret;
+}
+
+static int hisi_ddrc_pmu_remove(struct platform_device *pdev)
+{
+	struct hisi_pmu *ddrc_pmu = platform_get_drvdata(pdev);
+
+	perf_pmu_unregister(&ddrc_pmu->pmu);
+	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE,
+					    &ddrc_pmu->node);
+	return 0;
+}
+
+static struct platform_driver hisi_ddrc_pmu_driver = {
+	.driver = {
+		.name = "hisi_ddrc_pmu",
+		.acpi_match_table = ACPI_PTR(hisi_ddrc_pmu_acpi_match),
+		.suppress_bind_attrs = true,
+	},
+	.probe = hisi_ddrc_pmu_probe,
+	.remove = hisi_ddrc_pmu_remove,
+};
+
+static int __init hisi_ddrc_pmu_module_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE,
+				      "AP_PERF_ARM_HISI_DDRC_ONLINE",
+				      hisi_uncore_pmu_online_cpu,
+				      hisi_uncore_pmu_offline_cpu);
+	if (ret) {
+		pr_err("DDRC PMU: setup hotplug, ret = %d\n", ret);
+		return ret;
+	}
+
+	ret = platform_driver_register(&hisi_ddrc_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE);
+
+	return ret;
+}
+module_init(hisi_ddrc_pmu_module_init);
+
+static void __exit hisi_ddrc_pmu_module_exit(void)
+{
+	platform_driver_unregister(&hisi_ddrc_pmu_driver);
+	cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE);
+
+}
+module_exit(hisi_ddrc_pmu_module_exit);
+
+MODULE_DESCRIPTION("HiSilicon SoC DDRC uncore PMU driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>");
+MODULE_AUTHOR("Anurup M <anurup.m@huawei.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
new file mode 100644
index 0000000000..15caf99e1e
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
@@ -0,0 +1,588 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * HiSilicon SoC HHA uncore Hardware event counters support
+ *
+ * Copyright (C) 2017 HiSilicon Limited
+ * Author: Shaokun Zhang <zhangshaokun@hisilicon.com>
+ *         Anurup M <anurup.m@huawei.com>
+ *
+ * This code is based on the uncore PMUs like arm-cci and arm-ccn.
+ */
+#include <linux/acpi.h>
+#include <linux/bug.h>
+#include <linux/cpuhotplug.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/smp.h>
+
+#include "hisi_uncore_pmu.h"
+
+/* HHA register definition */
+#define HHA_INT_MASK		0x0804
+#define HHA_INT_STATUS		0x0808
+#define HHA_INT_CLEAR		0x080C
+#define HHA_VERSION		0x1cf0
+#define HHA_PERF_CTRL		0x1E00
+#define HHA_EVENT_CTRL		0x1E04
+#define HHA_SRCID_CTRL		0x1E08
+#define HHA_DATSRC_CTRL		0x1BF0
+#define HHA_EVENT_TYPE0		0x1E80
+/*
+ * If the HW version only supports a 48-bit counter, then
+ * bits [63:48] are reserved, which are Read-As-Zero and
+ * Writes-Ignored.
+ */
+#define HHA_CNT0_LOWER		0x1F00
+
+/* HHA PMU v1 has 16 counters and v2 only has 8 counters */
+#define HHA_V1_NR_COUNTERS	0x10
+#define HHA_V2_NR_COUNTERS	0x8
+
+#define HHA_PERF_CTRL_EN	0x1
+#define HHA_TRACETAG_EN		BIT(31)
+#define HHA_SRCID_EN		BIT(2)
+#define HHA_SRCID_CMD_SHIFT	6
+#define HHA_SRCID_MSK_SHIFT	20
+#define HHA_SRCID_CMD		GENMASK(16, 6)
+#define HHA_SRCID_MSK		GENMASK(30, 20)
+#define HHA_DATSRC_SKT_EN	BIT(23)
+#define HHA_EVTYPE_NONE		0xff
+#define HHA_V1_NR_EVENT		0x65
+#define HHA_V2_NR_EVENT		0xCE
+
+HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 10, 0);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 21, 11);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tracetag_en, config1, 22, 22);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 23, 23);
+
+static void hisi_hha_pmu_enable_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *hha_pmu = to_hisi_pmu(event->pmu);
+	u32 tt_en = hisi_get_tracetag_en(event);
+
+	if (tt_en) {
+		u32 val;
+
+		val = readl(hha_pmu->base + HHA_SRCID_CTRL);
+		val |= HHA_TRACETAG_EN;
+		writel(val, hha_pmu->base + HHA_SRCID_CTRL);
+	}
+}
+
+static void hisi_hha_pmu_clear_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *hha_pmu = to_hisi_pmu(event->pmu);
+	u32 val;
+
+	val = readl(hha_pmu->base + HHA_SRCID_CTRL);
+	val &= ~HHA_TRACETAG_EN;
+	writel(val, hha_pmu->base + HHA_SRCID_CTRL);
+}
+
+static void hisi_hha_pmu_config_ds(struct perf_event *event)
+{
+	struct hisi_pmu *hha_pmu = to_hisi_pmu(event->pmu);
+	u32 ds_skt = hisi_get_datasrc_skt(event);
+
+	if (ds_skt) {
+		u32 val;
+
+		val = readl(hha_pmu->base + HHA_DATSRC_CTRL);
+		val |= HHA_DATSRC_SKT_EN;
+		writel(val, hha_pmu->base + HHA_DATSRC_CTRL);
+	}
+}
+
+static void hisi_hha_pmu_clear_ds(struct perf_event *event)
+{
+	struct hisi_pmu *hha_pmu = to_hisi_pmu(event->pmu);
+	u32 ds_skt = hisi_get_datasrc_skt(event);
+
+	if (ds_skt) {
+		u32 val;
+
+		val = readl(hha_pmu->base + HHA_DATSRC_CTRL);
+		val &= ~HHA_DATSRC_SKT_EN;
+		writel(val, hha_pmu->base + HHA_DATSRC_CTRL);
+	}
+}
+
+static void hisi_hha_pmu_config_srcid(struct perf_event *event)
+{
+	struct hisi_pmu *hha_pmu = to_hisi_pmu(event->pmu);
+	u32 cmd = hisi_get_srcid_cmd(event);
+
+	if (cmd) {
+		u32 val, msk;
+
+		msk = hisi_get_srcid_msk(event);
+		val = readl(hha_pmu->base + HHA_SRCID_CTRL);
+		val |= HHA_SRCID_EN | (cmd << HHA_SRCID_CMD_SHIFT) |
+			(msk << HHA_SRCID_MSK_SHIFT);
+		writel(val, hha_pmu->base + HHA_SRCID_CTRL);
+	}
+}
+
+static void hisi_hha_pmu_disable_srcid(struct perf_event *event)
+{
+	struct hisi_pmu *hha_pmu = to_hisi_pmu(event->pmu);
+	u32 cmd = hisi_get_srcid_cmd(event);
+
+	if (cmd) {
+		u32 val;
+
+		val = readl(hha_pmu->base + HHA_SRCID_CTRL);
+		val &= ~(HHA_SRCID_EN | HHA_SRCID_MSK | HHA_SRCID_CMD);
+		writel(val, hha_pmu->base + HHA_SRCID_CTRL);
+	}
+}
+
+static void hisi_hha_pmu_enable_filter(struct perf_event *event)
+{
+	if (event->attr.config1 != 0x0) {
+		hisi_hha_pmu_enable_tracetag(event);
+		hisi_hha_pmu_config_ds(event);
+		hisi_hha_pmu_config_srcid(event);
+	}
+}
+
+static void hisi_hha_pmu_disable_filter(struct perf_event *event)
+{
+	if (event->attr.config1 != 0x0) {
+		hisi_hha_pmu_disable_srcid(event);
+		hisi_hha_pmu_clear_ds(event);
+		hisi_hha_pmu_clear_tracetag(event);
+	}
+}
+
+/*
+ * Select the counter register offset using the counter index
+ * each counter is 48-bits.
+ */
+static u32 hisi_hha_pmu_get_counter_offset(int cntr_idx)
+{
+	return (HHA_CNT0_LOWER + (cntr_idx * 8));
+}
+
+static u64 hisi_hha_pmu_read_counter(struct hisi_pmu *hha_pmu,
+				     struct hw_perf_event *hwc)
+{
+	/* Read 64 bits and like L3C, top 16 bits are RAZ */
+	return readq(hha_pmu->base + hisi_hha_pmu_get_counter_offset(hwc->idx));
+}
+
+static void hisi_hha_pmu_write_counter(struct hisi_pmu *hha_pmu,
+				       struct hw_perf_event *hwc, u64 val)
+{
+	/* Write 64 bits and like L3C, top 16 bits are WI */
+	writeq(val, hha_pmu->base + hisi_hha_pmu_get_counter_offset(hwc->idx));
+}
+
+static void hisi_hha_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx,
+				      u32 type)
+{
+	u32 reg, reg_idx, shift, val;
+
+	/*
+	 * Select the appropriate event select register(HHA_EVENT_TYPEx).
+	 * There are 4 event select registers for the 16 hardware counters.
+	 * Event code is 8-bits and for the first 4 hardware counters,
+	 * HHA_EVENT_TYPE0 is chosen. For the next 4 hardware counters,
+	 * HHA_EVENT_TYPE1 is chosen and so on.
+	 */
+	reg = HHA_EVENT_TYPE0 + 4 * (idx / 4);
+	reg_idx = idx % 4;
+	shift = 8 * reg_idx;
+
+	/* Write event code to HHA_EVENT_TYPEx register */
+	val = readl(hha_pmu->base + reg);
+	val &= ~(HHA_EVTYPE_NONE << shift);
+	val |= (type << shift);
+	writel(val, hha_pmu->base + reg);
+}
+
+static void hisi_hha_pmu_start_counters(struct hisi_pmu *hha_pmu)
+{
+	u32 val;
+
+	/*
+	 * Set perf_enable bit in HHA_PERF_CTRL to start event
+	 * counting for all enabled counters.
+	 */
+	val = readl(hha_pmu->base + HHA_PERF_CTRL);
+	val |= HHA_PERF_CTRL_EN;
+	writel(val, hha_pmu->base + HHA_PERF_CTRL);
+}
+
+static void hisi_hha_pmu_stop_counters(struct hisi_pmu *hha_pmu)
+{
+	u32 val;
+
+	/*
+	 * Clear perf_enable bit in HHA_PERF_CTRL to stop event
+	 * counting for all enabled counters.
+	 */
+	val = readl(hha_pmu->base + HHA_PERF_CTRL);
+	val &= ~(HHA_PERF_CTRL_EN);
+	writel(val, hha_pmu->base + HHA_PERF_CTRL);
+}
+
+static void hisi_hha_pmu_enable_counter(struct hisi_pmu *hha_pmu,
+					struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Enable counter index in HHA_EVENT_CTRL register */
+	val = readl(hha_pmu->base + HHA_EVENT_CTRL);
+	val |= (1 << hwc->idx);
+	writel(val, hha_pmu->base + HHA_EVENT_CTRL);
+}
+
+static void hisi_hha_pmu_disable_counter(struct hisi_pmu *hha_pmu,
+					 struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Clear counter index in HHA_EVENT_CTRL register */
+	val = readl(hha_pmu->base + HHA_EVENT_CTRL);
+	val &= ~(1 << hwc->idx);
+	writel(val, hha_pmu->base + HHA_EVENT_CTRL);
+}
+
+static void hisi_hha_pmu_enable_counter_int(struct hisi_pmu *hha_pmu,
+					    struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Write 0 to enable interrupt */
+	val = readl(hha_pmu->base + HHA_INT_MASK);
+	val &= ~(1 << hwc->idx);
+	writel(val, hha_pmu->base + HHA_INT_MASK);
+}
+
+static void hisi_hha_pmu_disable_counter_int(struct hisi_pmu *hha_pmu,
+					     struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Write 1 to mask interrupt */
+	val = readl(hha_pmu->base + HHA_INT_MASK);
+	val |= (1 << hwc->idx);
+	writel(val, hha_pmu->base + HHA_INT_MASK);
+}
+
+static u32 hisi_hha_pmu_get_int_status(struct hisi_pmu *hha_pmu)
+{
+	return readl(hha_pmu->base + HHA_INT_STATUS);
+}
+
+static void hisi_hha_pmu_clear_int_status(struct hisi_pmu *hha_pmu, int idx)
+{
+	writel(1 << idx, hha_pmu->base + HHA_INT_CLEAR);
+}
+
+static const struct acpi_device_id hisi_hha_pmu_acpi_match[] = {
+	{ "HISI0243", },
+	{ "HISI0244", },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, hisi_hha_pmu_acpi_match);
+
+static int hisi_hha_pmu_init_data(struct platform_device *pdev,
+				  struct hisi_pmu *hha_pmu)
+{
+	unsigned long long id;
+	acpi_status status;
+
+	/*
+	 * Use SCCL_ID and UID to identify the HHA PMU, while
+	 * SCCL_ID is in MPIDR[aff2].
+	 */
+	if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
+				     &hha_pmu->sccl_id)) {
+		dev_err(&pdev->dev, "Can not read hha sccl-id!\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Early versions of BIOS support _UID by mistake, so we support
+	 * both "hisilicon, idx-id" as preference, if available.
+	 */
+	if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id",
+				     &hha_pmu->index_id)) {
+		status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev),
+					       "_UID", NULL, &id);
+		if (ACPI_FAILURE(status)) {
+			dev_err(&pdev->dev, "Cannot read idx-id!\n");
+			return -EINVAL;
+		}
+
+		hha_pmu->index_id = id;
+	}
+	/* HHA PMUs only share the same SCCL */
+	hha_pmu->ccl_id = -1;
+
+	hha_pmu->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(hha_pmu->base)) {
+		dev_err(&pdev->dev, "ioremap failed for hha_pmu resource\n");
+		return PTR_ERR(hha_pmu->base);
+	}
+
+	hha_pmu->identifier = readl(hha_pmu->base + HHA_VERSION);
+
+	return 0;
+}
+
+static struct attribute *hisi_hha_pmu_v1_format_attr[] = {
+	HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+	NULL,
+};
+
+static const struct attribute_group hisi_hha_pmu_v1_format_group = {
+	.name = "format",
+	.attrs = hisi_hha_pmu_v1_format_attr,
+};
+
+static struct attribute *hisi_hha_pmu_v2_format_attr[] = {
+	HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+	HISI_PMU_FORMAT_ATTR(srcid_cmd, "config1:0-10"),
+	HISI_PMU_FORMAT_ATTR(srcid_msk, "config1:11-21"),
+	HISI_PMU_FORMAT_ATTR(tracetag_en, "config1:22"),
+	HISI_PMU_FORMAT_ATTR(datasrc_skt, "config1:23"),
+	NULL
+};
+
+static const struct attribute_group hisi_hha_pmu_v2_format_group = {
+	.name = "format",
+	.attrs = hisi_hha_pmu_v2_format_attr,
+};
+
+static struct attribute *hisi_hha_pmu_v1_events_attr[] = {
+	HISI_PMU_EVENT_ATTR(rx_ops_num,		0x00),
+	HISI_PMU_EVENT_ATTR(rx_outer,		0x01),
+	HISI_PMU_EVENT_ATTR(rx_sccl,		0x02),
+	HISI_PMU_EVENT_ATTR(rx_ccix,		0x03),
+	HISI_PMU_EVENT_ATTR(rx_wbi,		0x04),
+	HISI_PMU_EVENT_ATTR(rx_wbip,		0x05),
+	HISI_PMU_EVENT_ATTR(rx_wtistash,	0x11),
+	HISI_PMU_EVENT_ATTR(rd_ddr_64b,		0x1c),
+	HISI_PMU_EVENT_ATTR(wr_ddr_64b,		0x1d),
+	HISI_PMU_EVENT_ATTR(rd_ddr_128b,	0x1e),
+	HISI_PMU_EVENT_ATTR(wr_ddr_128b,	0x1f),
+	HISI_PMU_EVENT_ATTR(spill_num,		0x20),
+	HISI_PMU_EVENT_ATTR(spill_success,	0x21),
+	HISI_PMU_EVENT_ATTR(bi_num,		0x23),
+	HISI_PMU_EVENT_ATTR(mediated_num,	0x32),
+	HISI_PMU_EVENT_ATTR(tx_snp_num,		0x33),
+	HISI_PMU_EVENT_ATTR(tx_snp_outer,	0x34),
+	HISI_PMU_EVENT_ATTR(tx_snp_ccix,	0x35),
+	HISI_PMU_EVENT_ATTR(rx_snprspdata,	0x38),
+	HISI_PMU_EVENT_ATTR(rx_snprsp_outer,	0x3c),
+	HISI_PMU_EVENT_ATTR(sdir-lookup,	0x40),
+	HISI_PMU_EVENT_ATTR(edir-lookup,	0x41),
+	HISI_PMU_EVENT_ATTR(sdir-hit,		0x42),
+	HISI_PMU_EVENT_ATTR(edir-hit,		0x43),
+	HISI_PMU_EVENT_ATTR(sdir-home-migrate,	0x4c),
+	HISI_PMU_EVENT_ATTR(edir-home-migrate,  0x4d),
+	NULL,
+};
+
+static const struct attribute_group hisi_hha_pmu_v1_events_group = {
+	.name = "events",
+	.attrs = hisi_hha_pmu_v1_events_attr,
+};
+
+static struct attribute *hisi_hha_pmu_v2_events_attr[] = {
+	HISI_PMU_EVENT_ATTR(rx_ops_num,		0x00),
+	HISI_PMU_EVENT_ATTR(rx_outer,		0x01),
+	HISI_PMU_EVENT_ATTR(rx_sccl,		0x02),
+	HISI_PMU_EVENT_ATTR(hha_retry,		0x2e),
+	HISI_PMU_EVENT_ATTR(cycles,		0x55),
+	NULL
+};
+
+static const struct attribute_group hisi_hha_pmu_v2_events_group = {
+	.name = "events",
+	.attrs = hisi_hha_pmu_v2_events_attr,
+};
+
+static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
+
+static struct attribute *hisi_hha_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static const struct attribute_group hisi_hha_pmu_cpumask_attr_group = {
+	.attrs = hisi_hha_pmu_cpumask_attrs,
+};
+
+static struct device_attribute hisi_hha_pmu_identifier_attr =
+	__ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
+
+static struct attribute *hisi_hha_pmu_identifier_attrs[] = {
+	&hisi_hha_pmu_identifier_attr.attr,
+	NULL
+};
+
+static const struct attribute_group hisi_hha_pmu_identifier_group = {
+	.attrs = hisi_hha_pmu_identifier_attrs,
+};
+
+static const struct attribute_group *hisi_hha_pmu_v1_attr_groups[] = {
+	&hisi_hha_pmu_v1_format_group,
+	&hisi_hha_pmu_v1_events_group,
+	&hisi_hha_pmu_cpumask_attr_group,
+	&hisi_hha_pmu_identifier_group,
+	NULL,
+};
+
+static const struct attribute_group *hisi_hha_pmu_v2_attr_groups[] = {
+	&hisi_hha_pmu_v2_format_group,
+	&hisi_hha_pmu_v2_events_group,
+	&hisi_hha_pmu_cpumask_attr_group,
+	&hisi_hha_pmu_identifier_group,
+	NULL
+};
+
+static const struct hisi_uncore_ops hisi_uncore_hha_ops = {
+	.write_evtype		= hisi_hha_pmu_write_evtype,
+	.get_event_idx		= hisi_uncore_pmu_get_event_idx,
+	.start_counters		= hisi_hha_pmu_start_counters,
+	.stop_counters		= hisi_hha_pmu_stop_counters,
+	.enable_counter		= hisi_hha_pmu_enable_counter,
+	.disable_counter	= hisi_hha_pmu_disable_counter,
+	.enable_counter_int	= hisi_hha_pmu_enable_counter_int,
+	.disable_counter_int	= hisi_hha_pmu_disable_counter_int,
+	.write_counter		= hisi_hha_pmu_write_counter,
+	.read_counter		= hisi_hha_pmu_read_counter,
+	.get_int_status		= hisi_hha_pmu_get_int_status,
+	.clear_int_status	= hisi_hha_pmu_clear_int_status,
+	.enable_filter		= hisi_hha_pmu_enable_filter,
+	.disable_filter		= hisi_hha_pmu_disable_filter,
+};
+
+static int hisi_hha_pmu_dev_probe(struct platform_device *pdev,
+				  struct hisi_pmu *hha_pmu)
+{
+	int ret;
+
+	ret = hisi_hha_pmu_init_data(pdev, hha_pmu);
+	if (ret)
+		return ret;
+
+	ret = hisi_uncore_pmu_init_irq(hha_pmu, pdev);
+	if (ret)
+		return ret;
+
+	if (hha_pmu->identifier >= HISI_PMU_V2) {
+		hha_pmu->counter_bits = 64;
+		hha_pmu->check_event = HHA_V2_NR_EVENT;
+		hha_pmu->pmu_events.attr_groups = hisi_hha_pmu_v2_attr_groups;
+		hha_pmu->num_counters = HHA_V2_NR_COUNTERS;
+	} else {
+		hha_pmu->counter_bits = 48;
+		hha_pmu->check_event = HHA_V1_NR_EVENT;
+		hha_pmu->pmu_events.attr_groups = hisi_hha_pmu_v1_attr_groups;
+		hha_pmu->num_counters = HHA_V1_NR_COUNTERS;
+	}
+	hha_pmu->ops = &hisi_uncore_hha_ops;
+	hha_pmu->dev = &pdev->dev;
+	hha_pmu->on_cpu = -1;
+
+	return 0;
+}
+
+static int hisi_hha_pmu_probe(struct platform_device *pdev)
+{
+	struct hisi_pmu *hha_pmu;
+	char *name;
+	int ret;
+
+	hha_pmu = devm_kzalloc(&pdev->dev, sizeof(*hha_pmu), GFP_KERNEL);
+	if (!hha_pmu)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, hha_pmu);
+
+	ret = hisi_hha_pmu_dev_probe(pdev, hha_pmu);
+	if (ret)
+		return ret;
+
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_hha%u",
+			      hha_pmu->sccl_id, hha_pmu->index_id);
+	if (!name)
+		return -ENOMEM;
+
+	ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE,
+				       &hha_pmu->node);
+	if (ret) {
+		dev_err(&pdev->dev, "Error %d registering hotplug\n", ret);
+		return ret;
+	}
+
+	hisi_pmu_init(hha_pmu, THIS_MODULE);
+
+	ret = perf_pmu_register(&hha_pmu->pmu, name, -1);
+	if (ret) {
+		dev_err(hha_pmu->dev, "HHA PMU register failed!\n");
+		cpuhp_state_remove_instance_nocalls(
+			CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, &hha_pmu->node);
+	}
+
+	return ret;
+}
+
+static int hisi_hha_pmu_remove(struct platform_device *pdev)
+{
+	struct hisi_pmu *hha_pmu = platform_get_drvdata(pdev);
+
+	perf_pmu_unregister(&hha_pmu->pmu);
+	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE,
+					    &hha_pmu->node);
+	return 0;
+}
+
+static struct platform_driver hisi_hha_pmu_driver = {
+	.driver = {
+		.name = "hisi_hha_pmu",
+		.acpi_match_table = ACPI_PTR(hisi_hha_pmu_acpi_match),
+		.suppress_bind_attrs = true,
+	},
+	.probe = hisi_hha_pmu_probe,
+	.remove = hisi_hha_pmu_remove,
+};
+
+static int __init hisi_hha_pmu_module_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE,
+				      "AP_PERF_ARM_HISI_HHA_ONLINE",
+				      hisi_uncore_pmu_online_cpu,
+				      hisi_uncore_pmu_offline_cpu);
+	if (ret) {
+		pr_err("HHA PMU: Error setup hotplug, ret = %d;\n", ret);
+		return ret;
+	}
+
+	ret = platform_driver_register(&hisi_hha_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE);
+
+	return ret;
+}
+module_init(hisi_hha_pmu_module_init);
+
+static void __exit hisi_hha_pmu_module_exit(void)
+{
+	platform_driver_unregister(&hisi_hha_pmu_driver);
+	cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE);
+}
+module_exit(hisi_hha_pmu_module_exit);
+
+MODULE_DESCRIPTION("HiSilicon SoC HHA uncore PMU driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>");
+MODULE_AUTHOR("Anurup M <anurup.m@huawei.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
new file mode 100644
index 0000000000..794dbcd19b
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
@@ -0,0 +1,622 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * HiSilicon SoC L3C uncore Hardware event counters support
+ *
+ * Copyright (C) 2017 HiSilicon Limited
+ * Author: Anurup M <anurup.m@huawei.com>
+ *         Shaokun Zhang <zhangshaokun@hisilicon.com>
+ *
+ * This code is based on the uncore PMUs like arm-cci and arm-ccn.
+ */
+#include <linux/acpi.h>
+#include <linux/bug.h>
+#include <linux/cpuhotplug.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/smp.h>
+
+#include "hisi_uncore_pmu.h"
+
+/* L3C register definition */
+#define L3C_PERF_CTRL		0x0408
+#define L3C_INT_MASK		0x0800
+#define L3C_INT_STATUS		0x0808
+#define L3C_INT_CLEAR		0x080c
+#define L3C_CORE_CTRL           0x1b04
+#define L3C_TRACETAG_CTRL       0x1b20
+#define L3C_DATSRC_TYPE         0x1b48
+#define L3C_DATSRC_CTRL         0x1bf0
+#define L3C_EVENT_CTRL	        0x1c00
+#define L3C_VERSION		0x1cf0
+#define L3C_EVENT_TYPE0		0x1d00
+/*
+ * If the HW version only supports a 48-bit counter, then
+ * bits [63:48] are reserved, which are Read-As-Zero and
+ * Writes-Ignored.
+ */
+#define L3C_CNTR0_LOWER		0x1e00
+
+/* L3C has 8-counters */
+#define L3C_NR_COUNTERS		0x8
+
+#define L3C_PERF_CTRL_EN	0x10000
+#define L3C_TRACETAG_EN		BIT(31)
+#define L3C_TRACETAG_REQ_SHIFT	7
+#define L3C_TRACETAG_MARK_EN	BIT(0)
+#define L3C_TRACETAG_REQ_EN	(L3C_TRACETAG_MARK_EN | BIT(2))
+#define L3C_TRACETAG_CORE_EN	(L3C_TRACETAG_MARK_EN | BIT(3))
+#define L3C_CORE_EN		BIT(20)
+#define L3C_COER_NONE		0x0
+#define L3C_DATSRC_MASK		0xFF
+#define L3C_DATSRC_SKT_EN	BIT(23)
+#define L3C_DATSRC_NONE		0x0
+#define L3C_EVTYPE_NONE		0xff
+#define L3C_V1_NR_EVENTS	0x59
+#define L3C_V2_NR_EVENTS	0xFF
+
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config1, 7, 0);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_req, config1, 10, 8);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_cfg, config1, 15, 11);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 16, 16);
+
+static void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+	u32 tt_req = hisi_get_tt_req(event);
+
+	if (tt_req) {
+		u32 val;
+
+		/* Set request-type for tracetag */
+		val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+		val |= tt_req << L3C_TRACETAG_REQ_SHIFT;
+		val |= L3C_TRACETAG_REQ_EN;
+		writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+
+		/* Enable request-tracetag statistics */
+		val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+		val |= L3C_TRACETAG_EN;
+		writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+	}
+}
+
+static void hisi_l3c_pmu_clear_req_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+	u32 tt_req = hisi_get_tt_req(event);
+
+	if (tt_req) {
+		u32 val;
+
+		/* Clear request-type */
+		val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+		val &= ~(tt_req << L3C_TRACETAG_REQ_SHIFT);
+		val &= ~L3C_TRACETAG_REQ_EN;
+		writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+
+		/* Disable request-tracetag statistics */
+		val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+		val &= ~L3C_TRACETAG_EN;
+		writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+	}
+}
+
+static void hisi_l3c_pmu_write_ds(struct perf_event *event, u32 ds_cfg)
+{
+	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u32 reg, reg_idx, shift, val;
+	int idx = hwc->idx;
+
+	/*
+	 * Select the appropriate datasource register(L3C_DATSRC_TYPE0/1).
+	 * There are 2 datasource ctrl register for the 8 hardware counters.
+	 * Datasrc is 8-bits and for the former 4 hardware counters,
+	 * L3C_DATSRC_TYPE0 is chosen. For the latter 4 hardware counters,
+	 * L3C_DATSRC_TYPE1 is chosen.
+	 */
+	reg = L3C_DATSRC_TYPE + (idx / 4) * 4;
+	reg_idx = idx % 4;
+	shift = 8 * reg_idx;
+
+	val = readl(l3c_pmu->base + reg);
+	val &= ~(L3C_DATSRC_MASK << shift);
+	val |= ds_cfg << shift;
+	writel(val, l3c_pmu->base + reg);
+}
+
+static void hisi_l3c_pmu_config_ds(struct perf_event *event)
+{
+	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+	u32 ds_cfg = hisi_get_datasrc_cfg(event);
+	u32 ds_skt = hisi_get_datasrc_skt(event);
+
+	if (ds_cfg)
+		hisi_l3c_pmu_write_ds(event, ds_cfg);
+
+	if (ds_skt) {
+		u32 val;
+
+		val = readl(l3c_pmu->base + L3C_DATSRC_CTRL);
+		val |= L3C_DATSRC_SKT_EN;
+		writel(val, l3c_pmu->base + L3C_DATSRC_CTRL);
+	}
+}
+
+static void hisi_l3c_pmu_clear_ds(struct perf_event *event)
+{
+	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+	u32 ds_cfg = hisi_get_datasrc_cfg(event);
+	u32 ds_skt = hisi_get_datasrc_skt(event);
+
+	if (ds_cfg)
+		hisi_l3c_pmu_write_ds(event, L3C_DATSRC_NONE);
+
+	if (ds_skt) {
+		u32 val;
+
+		val = readl(l3c_pmu->base + L3C_DATSRC_CTRL);
+		val &= ~L3C_DATSRC_SKT_EN;
+		writel(val, l3c_pmu->base + L3C_DATSRC_CTRL);
+	}
+}
+
+static void hisi_l3c_pmu_config_core_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+	u32 core = hisi_get_tt_core(event);
+
+	if (core) {
+		u32 val;
+
+		/* Config and enable core information */
+		writel(core, l3c_pmu->base + L3C_CORE_CTRL);
+		val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+		val |= L3C_CORE_EN;
+		writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+
+		/* Enable core-tracetag statistics */
+		val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+		val |= L3C_TRACETAG_CORE_EN;
+		writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+	}
+}
+
+static void hisi_l3c_pmu_clear_core_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+	u32 core = hisi_get_tt_core(event);
+
+	if (core) {
+		u32 val;
+
+		/* Clear core information */
+		writel(L3C_COER_NONE, l3c_pmu->base + L3C_CORE_CTRL);
+		val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+		val &= ~L3C_CORE_EN;
+		writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+
+		/* Disable core-tracetag statistics */
+		val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+		val &= ~L3C_TRACETAG_CORE_EN;
+		writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+	}
+}
+
+static void hisi_l3c_pmu_enable_filter(struct perf_event *event)
+{
+	if (event->attr.config1 != 0x0) {
+		hisi_l3c_pmu_config_req_tracetag(event);
+		hisi_l3c_pmu_config_core_tracetag(event);
+		hisi_l3c_pmu_config_ds(event);
+	}
+}
+
+static void hisi_l3c_pmu_disable_filter(struct perf_event *event)
+{
+	if (event->attr.config1 != 0x0) {
+		hisi_l3c_pmu_clear_ds(event);
+		hisi_l3c_pmu_clear_core_tracetag(event);
+		hisi_l3c_pmu_clear_req_tracetag(event);
+	}
+}
+
+/*
+ * Select the counter register offset using the counter index
+ */
+static u32 hisi_l3c_pmu_get_counter_offset(int cntr_idx)
+{
+	return (L3C_CNTR0_LOWER + (cntr_idx * 8));
+}
+
+static u64 hisi_l3c_pmu_read_counter(struct hisi_pmu *l3c_pmu,
+				     struct hw_perf_event *hwc)
+{
+	return readq(l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx));
+}
+
+static void hisi_l3c_pmu_write_counter(struct hisi_pmu *l3c_pmu,
+				       struct hw_perf_event *hwc, u64 val)
+{
+	writeq(val, l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx));
+}
+
+static void hisi_l3c_pmu_write_evtype(struct hisi_pmu *l3c_pmu, int idx,
+				      u32 type)
+{
+	u32 reg, reg_idx, shift, val;
+
+	/*
+	 * Select the appropriate event select register(L3C_EVENT_TYPE0/1).
+	 * There are 2 event select registers for the 8 hardware counters.
+	 * Event code is 8-bits and for the former 4 hardware counters,
+	 * L3C_EVENT_TYPE0 is chosen. For the latter 4 hardware counters,
+	 * L3C_EVENT_TYPE1 is chosen.
+	 */
+	reg = L3C_EVENT_TYPE0 + (idx / 4) * 4;
+	reg_idx = idx % 4;
+	shift = 8 * reg_idx;
+
+	/* Write event code to L3C_EVENT_TYPEx Register */
+	val = readl(l3c_pmu->base + reg);
+	val &= ~(L3C_EVTYPE_NONE << shift);
+	val |= (type << shift);
+	writel(val, l3c_pmu->base + reg);
+}
+
+static void hisi_l3c_pmu_start_counters(struct hisi_pmu *l3c_pmu)
+{
+	u32 val;
+
+	/*
+	 * Set perf_enable bit in L3C_PERF_CTRL register to start counting
+	 * for all enabled counters.
+	 */
+	val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+	val |= L3C_PERF_CTRL_EN;
+	writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+}
+
+static void hisi_l3c_pmu_stop_counters(struct hisi_pmu *l3c_pmu)
+{
+	u32 val;
+
+	/*
+	 * Clear perf_enable bit in L3C_PERF_CTRL register to stop counting
+	 * for all enabled counters.
+	 */
+	val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+	val &= ~(L3C_PERF_CTRL_EN);
+	writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+}
+
+static void hisi_l3c_pmu_enable_counter(struct hisi_pmu *l3c_pmu,
+					struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Enable counter index in L3C_EVENT_CTRL register */
+	val = readl(l3c_pmu->base + L3C_EVENT_CTRL);
+	val |= (1 << hwc->idx);
+	writel(val, l3c_pmu->base + L3C_EVENT_CTRL);
+}
+
+static void hisi_l3c_pmu_disable_counter(struct hisi_pmu *l3c_pmu,
+					 struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Clear counter index in L3C_EVENT_CTRL register */
+	val = readl(l3c_pmu->base + L3C_EVENT_CTRL);
+	val &= ~(1 << hwc->idx);
+	writel(val, l3c_pmu->base + L3C_EVENT_CTRL);
+}
+
+static void hisi_l3c_pmu_enable_counter_int(struct hisi_pmu *l3c_pmu,
+					    struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	val = readl(l3c_pmu->base + L3C_INT_MASK);
+	/* Write 0 to enable interrupt */
+	val &= ~(1 << hwc->idx);
+	writel(val, l3c_pmu->base + L3C_INT_MASK);
+}
+
+static void hisi_l3c_pmu_disable_counter_int(struct hisi_pmu *l3c_pmu,
+					     struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	val = readl(l3c_pmu->base + L3C_INT_MASK);
+	/* Write 1 to mask interrupt */
+	val |= (1 << hwc->idx);
+	writel(val, l3c_pmu->base + L3C_INT_MASK);
+}
+
+static u32 hisi_l3c_pmu_get_int_status(struct hisi_pmu *l3c_pmu)
+{
+	return readl(l3c_pmu->base + L3C_INT_STATUS);
+}
+
+static void hisi_l3c_pmu_clear_int_status(struct hisi_pmu *l3c_pmu, int idx)
+{
+	writel(1 << idx, l3c_pmu->base + L3C_INT_CLEAR);
+}
+
+static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = {
+	{ "HISI0213", },
+	{ "HISI0214", },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match);
+
+static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
+				  struct hisi_pmu *l3c_pmu)
+{
+	/*
+	 * Use the SCCL_ID and CCL_ID to identify the L3C PMU, while
+	 * SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1].
+	 */
+	if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
+				     &l3c_pmu->sccl_id)) {
+		dev_err(&pdev->dev, "Can not read l3c sccl-id!\n");
+		return -EINVAL;
+	}
+
+	if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id",
+				     &l3c_pmu->ccl_id)) {
+		dev_err(&pdev->dev, "Can not read l3c ccl-id!\n");
+		return -EINVAL;
+	}
+
+	l3c_pmu->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(l3c_pmu->base)) {
+		dev_err(&pdev->dev, "ioremap failed for l3c_pmu resource\n");
+		return PTR_ERR(l3c_pmu->base);
+	}
+
+	l3c_pmu->identifier = readl(l3c_pmu->base + L3C_VERSION);
+
+	return 0;
+}
+
+static struct attribute *hisi_l3c_pmu_v1_format_attr[] = {
+	HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+	NULL,
+};
+
+static const struct attribute_group hisi_l3c_pmu_v1_format_group = {
+	.name = "format",
+	.attrs = hisi_l3c_pmu_v1_format_attr,
+};
+
+static struct attribute *hisi_l3c_pmu_v2_format_attr[] = {
+	HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+	HISI_PMU_FORMAT_ATTR(tt_core, "config1:0-7"),
+	HISI_PMU_FORMAT_ATTR(tt_req, "config1:8-10"),
+	HISI_PMU_FORMAT_ATTR(datasrc_cfg, "config1:11-15"),
+	HISI_PMU_FORMAT_ATTR(datasrc_skt, "config1:16"),
+	NULL
+};
+
+static const struct attribute_group hisi_l3c_pmu_v2_format_group = {
+	.name = "format",
+	.attrs = hisi_l3c_pmu_v2_format_attr,
+};
+
+static struct attribute *hisi_l3c_pmu_v1_events_attr[] = {
+	HISI_PMU_EVENT_ATTR(rd_cpipe,		0x00),
+	HISI_PMU_EVENT_ATTR(wr_cpipe,		0x01),
+	HISI_PMU_EVENT_ATTR(rd_hit_cpipe,	0x02),
+	HISI_PMU_EVENT_ATTR(wr_hit_cpipe,	0x03),
+	HISI_PMU_EVENT_ATTR(victim_num,		0x04),
+	HISI_PMU_EVENT_ATTR(rd_spipe,		0x20),
+	HISI_PMU_EVENT_ATTR(wr_spipe,		0x21),
+	HISI_PMU_EVENT_ATTR(rd_hit_spipe,	0x22),
+	HISI_PMU_EVENT_ATTR(wr_hit_spipe,	0x23),
+	HISI_PMU_EVENT_ATTR(back_invalid,	0x29),
+	HISI_PMU_EVENT_ATTR(retry_cpu,		0x40),
+	HISI_PMU_EVENT_ATTR(retry_ring,		0x41),
+	HISI_PMU_EVENT_ATTR(prefetch_drop,	0x42),
+	NULL,
+};
+
+static const struct attribute_group hisi_l3c_pmu_v1_events_group = {
+	.name = "events",
+	.attrs = hisi_l3c_pmu_v1_events_attr,
+};
+
+static struct attribute *hisi_l3c_pmu_v2_events_attr[] = {
+	HISI_PMU_EVENT_ATTR(l3c_hit,		0x48),
+	HISI_PMU_EVENT_ATTR(cycles,		0x7f),
+	HISI_PMU_EVENT_ATTR(l3c_ref,		0xb8),
+	HISI_PMU_EVENT_ATTR(dat_access,		0xb9),
+	NULL
+};
+
+static const struct attribute_group hisi_l3c_pmu_v2_events_group = {
+	.name = "events",
+	.attrs = hisi_l3c_pmu_v2_events_attr,
+};
+
+static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
+
+static struct attribute *hisi_l3c_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static const struct attribute_group hisi_l3c_pmu_cpumask_attr_group = {
+	.attrs = hisi_l3c_pmu_cpumask_attrs,
+};
+
+static struct device_attribute hisi_l3c_pmu_identifier_attr =
+	__ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
+
+static struct attribute *hisi_l3c_pmu_identifier_attrs[] = {
+	&hisi_l3c_pmu_identifier_attr.attr,
+	NULL
+};
+
+static const struct attribute_group hisi_l3c_pmu_identifier_group = {
+	.attrs = hisi_l3c_pmu_identifier_attrs,
+};
+
+static const struct attribute_group *hisi_l3c_pmu_v1_attr_groups[] = {
+	&hisi_l3c_pmu_v1_format_group,
+	&hisi_l3c_pmu_v1_events_group,
+	&hisi_l3c_pmu_cpumask_attr_group,
+	&hisi_l3c_pmu_identifier_group,
+	NULL,
+};
+
+static const struct attribute_group *hisi_l3c_pmu_v2_attr_groups[] = {
+	&hisi_l3c_pmu_v2_format_group,
+	&hisi_l3c_pmu_v2_events_group,
+	&hisi_l3c_pmu_cpumask_attr_group,
+	&hisi_l3c_pmu_identifier_group,
+	NULL
+};
+
+static const struct hisi_uncore_ops hisi_uncore_l3c_ops = {
+	.write_evtype		= hisi_l3c_pmu_write_evtype,
+	.get_event_idx		= hisi_uncore_pmu_get_event_idx,
+	.start_counters		= hisi_l3c_pmu_start_counters,
+	.stop_counters		= hisi_l3c_pmu_stop_counters,
+	.enable_counter		= hisi_l3c_pmu_enable_counter,
+	.disable_counter	= hisi_l3c_pmu_disable_counter,
+	.enable_counter_int	= hisi_l3c_pmu_enable_counter_int,
+	.disable_counter_int	= hisi_l3c_pmu_disable_counter_int,
+	.write_counter		= hisi_l3c_pmu_write_counter,
+	.read_counter		= hisi_l3c_pmu_read_counter,
+	.get_int_status		= hisi_l3c_pmu_get_int_status,
+	.clear_int_status	= hisi_l3c_pmu_clear_int_status,
+	.enable_filter		= hisi_l3c_pmu_enable_filter,
+	.disable_filter		= hisi_l3c_pmu_disable_filter,
+};
+
+static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev,
+				  struct hisi_pmu *l3c_pmu)
+{
+	int ret;
+
+	ret = hisi_l3c_pmu_init_data(pdev, l3c_pmu);
+	if (ret)
+		return ret;
+
+	ret = hisi_uncore_pmu_init_irq(l3c_pmu, pdev);
+	if (ret)
+		return ret;
+
+	if (l3c_pmu->identifier >= HISI_PMU_V2) {
+		l3c_pmu->counter_bits = 64;
+		l3c_pmu->check_event = L3C_V2_NR_EVENTS;
+		l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v2_attr_groups;
+	} else {
+		l3c_pmu->counter_bits = 48;
+		l3c_pmu->check_event = L3C_V1_NR_EVENTS;
+		l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v1_attr_groups;
+	}
+
+	l3c_pmu->num_counters = L3C_NR_COUNTERS;
+	l3c_pmu->ops = &hisi_uncore_l3c_ops;
+	l3c_pmu->dev = &pdev->dev;
+	l3c_pmu->on_cpu = -1;
+
+	return 0;
+}
+
+static int hisi_l3c_pmu_probe(struct platform_device *pdev)
+{
+	struct hisi_pmu *l3c_pmu;
+	char *name;
+	int ret;
+
+	l3c_pmu = devm_kzalloc(&pdev->dev, sizeof(*l3c_pmu), GFP_KERNEL);
+	if (!l3c_pmu)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, l3c_pmu);
+
+	ret = hisi_l3c_pmu_dev_probe(pdev, l3c_pmu);
+	if (ret)
+		return ret;
+
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u",
+			      l3c_pmu->sccl_id, l3c_pmu->ccl_id);
+	if (!name)
+		return -ENOMEM;
+
+	ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
+				       &l3c_pmu->node);
+	if (ret) {
+		dev_err(&pdev->dev, "Error %d registering hotplug\n", ret);
+		return ret;
+	}
+
+	hisi_pmu_init(l3c_pmu, THIS_MODULE);
+
+	ret = perf_pmu_register(&l3c_pmu->pmu, name, -1);
+	if (ret) {
+		dev_err(l3c_pmu->dev, "L3C PMU register failed!\n");
+		cpuhp_state_remove_instance_nocalls(
+			CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, &l3c_pmu->node);
+	}
+
+	return ret;
+}
+
+static int hisi_l3c_pmu_remove(struct platform_device *pdev)
+{
+	struct hisi_pmu *l3c_pmu = platform_get_drvdata(pdev);
+
+	perf_pmu_unregister(&l3c_pmu->pmu);
+	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
+					    &l3c_pmu->node);
+	return 0;
+}
+
+static struct platform_driver hisi_l3c_pmu_driver = {
+	.driver = {
+		.name = "hisi_l3c_pmu",
+		.acpi_match_table = ACPI_PTR(hisi_l3c_pmu_acpi_match),
+		.suppress_bind_attrs = true,
+	},
+	.probe = hisi_l3c_pmu_probe,
+	.remove = hisi_l3c_pmu_remove,
+};
+
+static int __init hisi_l3c_pmu_module_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
+				      "AP_PERF_ARM_HISI_L3_ONLINE",
+				      hisi_uncore_pmu_online_cpu,
+				      hisi_uncore_pmu_offline_cpu);
+	if (ret) {
+		pr_err("L3C PMU: Error setup hotplug, ret = %d\n", ret);
+		return ret;
+	}
+
+	ret = platform_driver_register(&hisi_l3c_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE);
+
+	return ret;
+}
+module_init(hisi_l3c_pmu_module_init);
+
+static void __exit hisi_l3c_pmu_module_exit(void)
+{
+	platform_driver_unregister(&hisi_l3c_pmu_driver);
+	cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE);
+}
+module_exit(hisi_l3c_pmu_module_exit);
+
+MODULE_DESCRIPTION("HiSilicon SoC L3C uncore PMU driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Anurup M <anurup.m@huawei.com>");
+MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
new file mode 100644
index 0000000000..797cf20199
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
@@ -0,0 +1,576 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * HiSilicon PA uncore Hardware event counters support
+ *
+ * Copyright (C) 2020 HiSilicon Limited
+ * Author: Shaokun Zhang <zhangshaokun@hisilicon.com>
+ *
+ * This code is based on the uncore PMUs like arm-cci and arm-ccn.
+ */
+#include <linux/acpi.h>
+#include <linux/cpuhotplug.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/smp.h>
+
+#include "hisi_uncore_pmu.h"
+
+/* PA register definition */
+#define PA_PERF_CTRL			0x1c00
+#define PA_EVENT_CTRL			0x1c04
+#define PA_TT_CTRL			0x1c08
+#define PA_TGTID_CTRL			0x1c14
+#define PA_SRCID_CTRL			0x1c18
+
+/* H32 PA interrupt registers */
+#define PA_INT_MASK			0x1c70
+#define PA_INT_STATUS			0x1c78
+#define PA_INT_CLEAR			0x1c7c
+
+#define H60PA_INT_STATUS		0x1c70
+#define H60PA_INT_MASK			0x1c74
+
+#define PA_EVENT_TYPE0			0x1c80
+#define PA_PMU_VERSION			0x1cf0
+#define PA_EVENT_CNT0_L			0x1d00
+
+#define PA_EVTYPE_MASK			0xff
+#define PA_NR_COUNTERS			0x8
+#define PA_PERF_CTRL_EN			BIT(0)
+#define PA_TRACETAG_EN			BIT(4)
+#define PA_TGTID_EN			BIT(11)
+#define PA_SRCID_EN			BIT(11)
+#define PA_TGTID_NONE			0
+#define PA_SRCID_NONE			0
+#define PA_TGTID_MSK_SHIFT		12
+#define PA_SRCID_MSK_SHIFT		12
+
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_cmd, config1, 10, 0);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_msk, config1, 21, 11);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 32, 22);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 43, 33);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tracetag_en, config1, 44, 44);
+
+struct hisi_pa_pmu_int_regs {
+	u32 mask_offset;
+	u32 clear_offset;
+	u32 status_offset;
+};
+
+static void hisi_pa_pmu_enable_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu);
+	u32 tt_en = hisi_get_tracetag_en(event);
+
+	if (tt_en) {
+		u32 val;
+
+		val = readl(pa_pmu->base + PA_TT_CTRL);
+		val |= PA_TRACETAG_EN;
+		writel(val, pa_pmu->base + PA_TT_CTRL);
+	}
+}
+
+static void hisi_pa_pmu_clear_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu);
+	u32 tt_en = hisi_get_tracetag_en(event);
+
+	if (tt_en) {
+		u32 val;
+
+		val = readl(pa_pmu->base + PA_TT_CTRL);
+		val &= ~PA_TRACETAG_EN;
+		writel(val, pa_pmu->base + PA_TT_CTRL);
+	}
+}
+
+static void hisi_pa_pmu_config_tgtid(struct perf_event *event)
+{
+	struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu);
+	u32 cmd = hisi_get_tgtid_cmd(event);
+
+	if (cmd) {
+		u32 msk = hisi_get_tgtid_msk(event);
+		u32 val = cmd | PA_TGTID_EN | (msk << PA_TGTID_MSK_SHIFT);
+
+		writel(val, pa_pmu->base + PA_TGTID_CTRL);
+	}
+}
+
+static void hisi_pa_pmu_clear_tgtid(struct perf_event *event)
+{
+	struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu);
+	u32 cmd = hisi_get_tgtid_cmd(event);
+
+	if (cmd)
+		writel(PA_TGTID_NONE, pa_pmu->base + PA_TGTID_CTRL);
+}
+
+static void hisi_pa_pmu_config_srcid(struct perf_event *event)
+{
+	struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu);
+	u32 cmd = hisi_get_srcid_cmd(event);
+
+	if (cmd) {
+		u32 msk = hisi_get_srcid_msk(event);
+		u32 val = cmd | PA_SRCID_EN | (msk << PA_SRCID_MSK_SHIFT);
+
+		writel(val, pa_pmu->base + PA_SRCID_CTRL);
+	}
+}
+
+static void hisi_pa_pmu_clear_srcid(struct perf_event *event)
+{
+	struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu);
+	u32 cmd = hisi_get_srcid_cmd(event);
+
+	if (cmd)
+		writel(PA_SRCID_NONE, pa_pmu->base + PA_SRCID_CTRL);
+}
+
+static void hisi_pa_pmu_enable_filter(struct perf_event *event)
+{
+	if (event->attr.config1 != 0x0) {
+		hisi_pa_pmu_enable_tracetag(event);
+		hisi_pa_pmu_config_srcid(event);
+		hisi_pa_pmu_config_tgtid(event);
+	}
+}
+
+static void hisi_pa_pmu_disable_filter(struct perf_event *event)
+{
+	if (event->attr.config1 != 0x0) {
+		hisi_pa_pmu_clear_tgtid(event);
+		hisi_pa_pmu_clear_srcid(event);
+		hisi_pa_pmu_clear_tracetag(event);
+	}
+}
+
+static u32 hisi_pa_pmu_get_counter_offset(int idx)
+{
+	return (PA_EVENT_CNT0_L + idx * 8);
+}
+
+static u64 hisi_pa_pmu_read_counter(struct hisi_pmu *pa_pmu,
+				    struct hw_perf_event *hwc)
+{
+	return readq(pa_pmu->base + hisi_pa_pmu_get_counter_offset(hwc->idx));
+}
+
+static void hisi_pa_pmu_write_counter(struct hisi_pmu *pa_pmu,
+				      struct hw_perf_event *hwc, u64 val)
+{
+	writeq(val, pa_pmu->base + hisi_pa_pmu_get_counter_offset(hwc->idx));
+}
+
+static void hisi_pa_pmu_write_evtype(struct hisi_pmu *pa_pmu, int idx,
+				     u32 type)
+{
+	u32 reg, reg_idx, shift, val;
+
+	/*
+	 * Select the appropriate event select register(PA_EVENT_TYPE0/1).
+	 * There are 2 event select registers for the 8 hardware counters.
+	 * Event code is 8-bits and for the former 4 hardware counters,
+	 * PA_EVENT_TYPE0 is chosen. For the latter 4 hardware counters,
+	 * PA_EVENT_TYPE1 is chosen.
+	 */
+	reg = PA_EVENT_TYPE0 + (idx / 4) * 4;
+	reg_idx = idx % 4;
+	shift = 8 * reg_idx;
+
+	/* Write event code to pa_EVENT_TYPEx Register */
+	val = readl(pa_pmu->base + reg);
+	val &= ~(PA_EVTYPE_MASK << shift);
+	val |= (type << shift);
+	writel(val, pa_pmu->base + reg);
+}
+
+static void hisi_pa_pmu_start_counters(struct hisi_pmu *pa_pmu)
+{
+	u32 val;
+
+	val = readl(pa_pmu->base + PA_PERF_CTRL);
+	val |= PA_PERF_CTRL_EN;
+	writel(val, pa_pmu->base + PA_PERF_CTRL);
+}
+
+static void hisi_pa_pmu_stop_counters(struct hisi_pmu *pa_pmu)
+{
+	u32 val;
+
+	val = readl(pa_pmu->base + PA_PERF_CTRL);
+	val &= ~(PA_PERF_CTRL_EN);
+	writel(val, pa_pmu->base + PA_PERF_CTRL);
+}
+
+static void hisi_pa_pmu_enable_counter(struct hisi_pmu *pa_pmu,
+				       struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Enable counter index in PA_EVENT_CTRL register */
+	val = readl(pa_pmu->base + PA_EVENT_CTRL);
+	val |= 1 << hwc->idx;
+	writel(val, pa_pmu->base + PA_EVENT_CTRL);
+}
+
+static void hisi_pa_pmu_disable_counter(struct hisi_pmu *pa_pmu,
+					struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Clear counter index in PA_EVENT_CTRL register */
+	val = readl(pa_pmu->base + PA_EVENT_CTRL);
+	val &= ~(1 << hwc->idx);
+	writel(val, pa_pmu->base + PA_EVENT_CTRL);
+}
+
+static void hisi_pa_pmu_enable_counter_int(struct hisi_pmu *pa_pmu,
+					   struct hw_perf_event *hwc)
+{
+	struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private;
+	u32 val;
+
+	/* Write 0 to enable interrupt */
+	val = readl(pa_pmu->base + regs->mask_offset);
+	val &= ~(1 << hwc->idx);
+	writel(val, pa_pmu->base + regs->mask_offset);
+}
+
+static void hisi_pa_pmu_disable_counter_int(struct hisi_pmu *pa_pmu,
+					    struct hw_perf_event *hwc)
+{
+	struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private;
+	u32 val;
+
+	/* Write 1 to mask interrupt */
+	val = readl(pa_pmu->base + regs->mask_offset);
+	val |= 1 << hwc->idx;
+	writel(val, pa_pmu->base + regs->mask_offset);
+}
+
+static u32 hisi_pa_pmu_get_int_status(struct hisi_pmu *pa_pmu)
+{
+	struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private;
+
+	return readl(pa_pmu->base + regs->status_offset);
+}
+
+static void hisi_pa_pmu_clear_int_status(struct hisi_pmu *pa_pmu, int idx)
+{
+	struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private;
+
+	writel(1 << idx, pa_pmu->base + regs->clear_offset);
+}
+
+static int hisi_pa_pmu_init_data(struct platform_device *pdev,
+				   struct hisi_pmu *pa_pmu)
+{
+	/*
+	 * As PA PMU is in a SICL, use the SICL_ID and the index ID
+	 * to identify the PA PMU.
+	 */
+	if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
+				     &pa_pmu->sicl_id)) {
+		dev_err(&pdev->dev, "Cannot read sicl-id!\n");
+		return -EINVAL;
+	}
+
+	if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id",
+				     &pa_pmu->index_id)) {
+		dev_err(&pdev->dev, "Cannot read idx-id!\n");
+		return -EINVAL;
+	}
+
+	pa_pmu->ccl_id = -1;
+	pa_pmu->sccl_id = -1;
+
+	pa_pmu->dev_info = device_get_match_data(&pdev->dev);
+	if (!pa_pmu->dev_info)
+		return -ENODEV;
+
+	pa_pmu->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(pa_pmu->base)) {
+		dev_err(&pdev->dev, "ioremap failed for pa_pmu resource.\n");
+		return PTR_ERR(pa_pmu->base);
+	}
+
+	pa_pmu->identifier = readl(pa_pmu->base + PA_PMU_VERSION);
+
+	return 0;
+}
+
+static struct attribute *hisi_pa_pmu_v2_format_attr[] = {
+	HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+	HISI_PMU_FORMAT_ATTR(tgtid_cmd, "config1:0-10"),
+	HISI_PMU_FORMAT_ATTR(tgtid_msk, "config1:11-21"),
+	HISI_PMU_FORMAT_ATTR(srcid_cmd, "config1:22-32"),
+	HISI_PMU_FORMAT_ATTR(srcid_msk, "config1:33-43"),
+	HISI_PMU_FORMAT_ATTR(tracetag_en, "config1:44"),
+	NULL,
+};
+
+static const struct attribute_group hisi_pa_pmu_v2_format_group = {
+	.name = "format",
+	.attrs = hisi_pa_pmu_v2_format_attr,
+};
+
+static struct attribute *hisi_pa_pmu_v2_events_attr[] = {
+	HISI_PMU_EVENT_ATTR(rx_req,		0x40),
+	HISI_PMU_EVENT_ATTR(tx_req,             0x5c),
+	HISI_PMU_EVENT_ATTR(cycle,		0x78),
+	NULL
+};
+
+static const struct attribute_group hisi_pa_pmu_v2_events_group = {
+	.name = "events",
+	.attrs = hisi_pa_pmu_v2_events_attr,
+};
+
+static struct attribute *hisi_pa_pmu_v3_events_attr[] = {
+	HISI_PMU_EVENT_ATTR(tx_req,	0x0),
+	HISI_PMU_EVENT_ATTR(tx_dat,	0x1),
+	HISI_PMU_EVENT_ATTR(tx_snp,	0x2),
+	HISI_PMU_EVENT_ATTR(rx_req,	0x7),
+	HISI_PMU_EVENT_ATTR(rx_dat,	0x8),
+	HISI_PMU_EVENT_ATTR(rx_snp,	0x9),
+	NULL
+};
+
+static const struct attribute_group hisi_pa_pmu_v3_events_group = {
+	.name = "events",
+	.attrs = hisi_pa_pmu_v3_events_attr,
+};
+
+static struct attribute *hisi_h60pa_pmu_events_attr[] = {
+	HISI_PMU_EVENT_ATTR(rx_flit,	0x50),
+	HISI_PMU_EVENT_ATTR(tx_flit,	0x65),
+	NULL
+};
+
+static const struct attribute_group hisi_h60pa_pmu_events_group = {
+	.name = "events",
+	.attrs = hisi_h60pa_pmu_events_attr,
+};
+
+static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
+
+static struct attribute *hisi_pa_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL
+};
+
+static const struct attribute_group hisi_pa_pmu_cpumask_attr_group = {
+	.attrs = hisi_pa_pmu_cpumask_attrs,
+};
+
+static struct device_attribute hisi_pa_pmu_identifier_attr =
+	__ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
+
+static struct attribute *hisi_pa_pmu_identifier_attrs[] = {
+	&hisi_pa_pmu_identifier_attr.attr,
+	NULL
+};
+
+static const struct attribute_group hisi_pa_pmu_identifier_group = {
+	.attrs = hisi_pa_pmu_identifier_attrs,
+};
+
+static struct hisi_pa_pmu_int_regs hisi_pa_pmu_regs = {
+	.mask_offset = PA_INT_MASK,
+	.clear_offset = PA_INT_CLEAR,
+	.status_offset = PA_INT_STATUS,
+};
+
+static const struct attribute_group *hisi_pa_pmu_v2_attr_groups[] = {
+	&hisi_pa_pmu_v2_format_group,
+	&hisi_pa_pmu_v2_events_group,
+	&hisi_pa_pmu_cpumask_attr_group,
+	&hisi_pa_pmu_identifier_group,
+	NULL
+};
+
+static const struct hisi_pmu_dev_info hisi_h32pa_v2 = {
+	.name = "pa",
+	.attr_groups = hisi_pa_pmu_v2_attr_groups,
+	.private = &hisi_pa_pmu_regs,
+};
+
+static const struct attribute_group *hisi_pa_pmu_v3_attr_groups[] = {
+	&hisi_pa_pmu_v2_format_group,
+	&hisi_pa_pmu_v3_events_group,
+	&hisi_pa_pmu_cpumask_attr_group,
+	&hisi_pa_pmu_identifier_group,
+	NULL
+};
+
+static const struct hisi_pmu_dev_info hisi_h32pa_v3 = {
+	.name = "pa",
+	.attr_groups = hisi_pa_pmu_v3_attr_groups,
+	.private = &hisi_pa_pmu_regs,
+};
+
+static struct hisi_pa_pmu_int_regs hisi_h60pa_pmu_regs = {
+	.mask_offset = H60PA_INT_MASK,
+	.clear_offset = H60PA_INT_STATUS, /* Clear on write */
+	.status_offset = H60PA_INT_STATUS,
+};
+
+static const struct attribute_group *hisi_h60pa_pmu_attr_groups[] = {
+	&hisi_pa_pmu_v2_format_group,
+	&hisi_h60pa_pmu_events_group,
+	&hisi_pa_pmu_cpumask_attr_group,
+	&hisi_pa_pmu_identifier_group,
+	NULL
+};
+
+static const struct hisi_pmu_dev_info hisi_h60pa = {
+	.name = "h60pa",
+	.attr_groups = hisi_h60pa_pmu_attr_groups,
+	.private = &hisi_h60pa_pmu_regs,
+};
+
+static const struct hisi_uncore_ops hisi_uncore_pa_ops = {
+	.write_evtype		= hisi_pa_pmu_write_evtype,
+	.get_event_idx		= hisi_uncore_pmu_get_event_idx,
+	.start_counters		= hisi_pa_pmu_start_counters,
+	.stop_counters		= hisi_pa_pmu_stop_counters,
+	.enable_counter		= hisi_pa_pmu_enable_counter,
+	.disable_counter	= hisi_pa_pmu_disable_counter,
+	.enable_counter_int	= hisi_pa_pmu_enable_counter_int,
+	.disable_counter_int	= hisi_pa_pmu_disable_counter_int,
+	.write_counter		= hisi_pa_pmu_write_counter,
+	.read_counter		= hisi_pa_pmu_read_counter,
+	.get_int_status		= hisi_pa_pmu_get_int_status,
+	.clear_int_status	= hisi_pa_pmu_clear_int_status,
+	.enable_filter		= hisi_pa_pmu_enable_filter,
+	.disable_filter		= hisi_pa_pmu_disable_filter,
+};
+
+static int hisi_pa_pmu_dev_probe(struct platform_device *pdev,
+				 struct hisi_pmu *pa_pmu)
+{
+	int ret;
+
+	ret = hisi_pa_pmu_init_data(pdev, pa_pmu);
+	if (ret)
+		return ret;
+
+	ret = hisi_uncore_pmu_init_irq(pa_pmu, pdev);
+	if (ret)
+		return ret;
+
+	pa_pmu->pmu_events.attr_groups = pa_pmu->dev_info->attr_groups;
+	pa_pmu->num_counters = PA_NR_COUNTERS;
+	pa_pmu->ops = &hisi_uncore_pa_ops;
+	pa_pmu->check_event = 0xB0;
+	pa_pmu->counter_bits = 64;
+	pa_pmu->dev = &pdev->dev;
+	pa_pmu->on_cpu = -1;
+
+	return 0;
+}
+
+static int hisi_pa_pmu_probe(struct platform_device *pdev)
+{
+	struct hisi_pmu *pa_pmu;
+	char *name;
+	int ret;
+
+	pa_pmu = devm_kzalloc(&pdev->dev, sizeof(*pa_pmu), GFP_KERNEL);
+	if (!pa_pmu)
+		return -ENOMEM;
+
+	ret = hisi_pa_pmu_dev_probe(pdev, pa_pmu);
+	if (ret)
+		return ret;
+
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_%s%u",
+			      pa_pmu->sicl_id, pa_pmu->dev_info->name,
+			      pa_pmu->index_id);
+	if (!name)
+		return -ENOMEM;
+
+	ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE,
+				       &pa_pmu->node);
+	if (ret) {
+		dev_err(&pdev->dev, "Error %d registering hotplug\n", ret);
+		return ret;
+	}
+
+	hisi_pmu_init(pa_pmu, THIS_MODULE);
+	ret = perf_pmu_register(&pa_pmu->pmu, name, -1);
+	if (ret) {
+		dev_err(pa_pmu->dev, "PMU register failed, ret = %d\n", ret);
+		cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE,
+						    &pa_pmu->node);
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, pa_pmu);
+	return ret;
+}
+
+static int hisi_pa_pmu_remove(struct platform_device *pdev)
+{
+	struct hisi_pmu *pa_pmu = platform_get_drvdata(pdev);
+
+	perf_pmu_unregister(&pa_pmu->pmu);
+	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE,
+					    &pa_pmu->node);
+	return 0;
+}
+
+static const struct acpi_device_id hisi_pa_pmu_acpi_match[] = {
+	{ "HISI0273", (kernel_ulong_t)&hisi_h32pa_v2 },
+	{ "HISI0275", (kernel_ulong_t)&hisi_h32pa_v3 },
+	{ "HISI0274", (kernel_ulong_t)&hisi_h60pa },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, hisi_pa_pmu_acpi_match);
+
+static struct platform_driver hisi_pa_pmu_driver = {
+	.driver = {
+		.name = "hisi_pa_pmu",
+		.acpi_match_table = hisi_pa_pmu_acpi_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe = hisi_pa_pmu_probe,
+	.remove = hisi_pa_pmu_remove,
+};
+
+static int __init hisi_pa_pmu_module_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE,
+				      "AP_PERF_ARM_HISI_PA_ONLINE",
+				      hisi_uncore_pmu_online_cpu,
+				      hisi_uncore_pmu_offline_cpu);
+	if (ret) {
+		pr_err("PA PMU: cpuhp state setup failed, ret = %d\n", ret);
+		return ret;
+	}
+
+	ret = platform_driver_register(&hisi_pa_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE);
+
+	return ret;
+}
+module_init(hisi_pa_pmu_module_init);
+
+static void __exit hisi_pa_pmu_module_exit(void)
+{
+	platform_driver_unregister(&hisi_pa_pmu_driver);
+	cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE);
+}
+module_exit(hisi_pa_pmu_module_exit);
+
+MODULE_DESCRIPTION("HiSilicon Protocol Adapter uncore PMU driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>");
+MODULE_AUTHOR("Qi Liu <liuqi115@huawei.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
new file mode 100644
index 0000000000..04031450d5
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
@@ -0,0 +1,555 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * HiSilicon SoC Hardware event counters support
+ *
+ * Copyright (C) 2017 HiSilicon Limited
+ * Author: Anurup M <anurup.m@huawei.com>
+ *         Shaokun Zhang <zhangshaokun@hisilicon.com>
+ *
+ * This code is based on the uncore PMUs like arm-cci and arm-ccn.
+ */
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/bug.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+
+#include <asm/cputype.h>
+#include <asm/local64.h>
+
+#include "hisi_uncore_pmu.h"
+
+#define HISI_MAX_PERIOD(nr) (GENMASK_ULL((nr) - 1, 0))
+
+/*
+ * PMU format attributes
+ */
+ssize_t hisi_format_sysfs_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *eattr;
+
+	eattr = container_of(attr, struct dev_ext_attribute, attr);
+
+	return sysfs_emit(buf, "%s\n", (char *)eattr->var);
+}
+EXPORT_SYMBOL_GPL(hisi_format_sysfs_show);
+
+/*
+ * PMU event attributes
+ */
+ssize_t hisi_event_sysfs_show(struct device *dev,
+			      struct device_attribute *attr, char *page)
+{
+	struct dev_ext_attribute *eattr;
+
+	eattr = container_of(attr, struct dev_ext_attribute, attr);
+
+	return sysfs_emit(page, "config=0x%lx\n", (unsigned long)eattr->var);
+}
+EXPORT_SYMBOL_GPL(hisi_event_sysfs_show);
+
+/*
+ * sysfs cpumask attributes. For uncore PMU, we only have a single CPU to show
+ */
+ssize_t hisi_cpumask_sysfs_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev));
+
+	return sysfs_emit(buf, "%d\n", hisi_pmu->on_cpu);
+}
+EXPORT_SYMBOL_GPL(hisi_cpumask_sysfs_show);
+
+static bool hisi_validate_event_group(struct perf_event *event)
+{
+	struct perf_event *sibling, *leader = event->group_leader;
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
+	/* Include count for the event */
+	int counters = 1;
+
+	if (!is_software_event(leader)) {
+		/*
+		 * We must NOT create groups containing mixed PMUs, although
+		 * software events are acceptable
+		 */
+		if (leader->pmu != event->pmu)
+			return false;
+
+		/* Increment counter for the leader */
+		if (leader != event)
+			counters++;
+	}
+
+	for_each_sibling_event(sibling, event->group_leader) {
+		if (is_software_event(sibling))
+			continue;
+		if (sibling->pmu != event->pmu)
+			return false;
+		/* Increment counter for each sibling */
+		counters++;
+	}
+
+	/* The group can not count events more than the counters in the HW */
+	return counters <= hisi_pmu->num_counters;
+}
+
+int hisi_uncore_pmu_get_event_idx(struct perf_event *event)
+{
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
+	unsigned long *used_mask = hisi_pmu->pmu_events.used_mask;
+	u32 num_counters = hisi_pmu->num_counters;
+	int idx;
+
+	idx = find_first_zero_bit(used_mask, num_counters);
+	if (idx == num_counters)
+		return -EAGAIN;
+
+	set_bit(idx, used_mask);
+
+	return idx;
+}
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_get_event_idx);
+
+ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev,
+					     struct device_attribute *attr,
+					     char *page)
+{
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev));
+
+	return sysfs_emit(page, "0x%08x\n", hisi_pmu->identifier);
+}
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_identifier_attr_show);
+
+static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx)
+{
+	clear_bit(idx, hisi_pmu->pmu_events.used_mask);
+}
+
+static irqreturn_t hisi_uncore_pmu_isr(int irq, void *data)
+{
+	struct hisi_pmu *hisi_pmu = data;
+	struct perf_event *event;
+	unsigned long overflown;
+	int idx;
+
+	overflown = hisi_pmu->ops->get_int_status(hisi_pmu);
+	if (!overflown)
+		return IRQ_NONE;
+
+	/*
+	 * Find the counter index which overflowed if the bit was set
+	 * and handle it.
+	 */
+	for_each_set_bit(idx, &overflown, hisi_pmu->num_counters) {
+		/* Write 1 to clear the IRQ status flag */
+		hisi_pmu->ops->clear_int_status(hisi_pmu, idx);
+		/* Get the corresponding event struct */
+		event = hisi_pmu->pmu_events.hw_events[idx];
+		if (!event)
+			continue;
+
+		hisi_uncore_pmu_event_update(event);
+		hisi_uncore_pmu_set_event_period(event);
+	}
+
+	return IRQ_HANDLED;
+}
+
+int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu,
+			     struct platform_device *pdev)
+{
+	int irq, ret;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	ret = devm_request_irq(&pdev->dev, irq, hisi_uncore_pmu_isr,
+			       IRQF_NOBALANCING | IRQF_NO_THREAD,
+			       dev_name(&pdev->dev), hisi_pmu);
+	if (ret < 0) {
+		dev_err(&pdev->dev,
+			"Fail to request IRQ: %d ret: %d.\n", irq, ret);
+		return ret;
+	}
+
+	hisi_pmu->irq = irq;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_init_irq);
+
+int hisi_uncore_pmu_event_init(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct hisi_pmu *hisi_pmu;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/*
+	 * We do not support sampling as the counters are all
+	 * shared by all CPU cores in a CPU die(SCCL). Also we
+	 * do not support attach to a task(per-process mode)
+	 */
+	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+		return -EOPNOTSUPP;
+
+	/*
+	 *  The uncore counters not specific to any CPU, so cannot
+	 *  support per-task
+	 */
+	if (event->cpu < 0)
+		return -EINVAL;
+
+	/*
+	 * Validate if the events in group does not exceed the
+	 * available counters in hardware.
+	 */
+	if (!hisi_validate_event_group(event))
+		return -EINVAL;
+
+	hisi_pmu = to_hisi_pmu(event->pmu);
+	if (event->attr.config > hisi_pmu->check_event)
+		return -EINVAL;
+
+	if (hisi_pmu->on_cpu == -1)
+		return -EINVAL;
+	/*
+	 * We don't assign an index until we actually place the event onto
+	 * hardware. Use -1 to signify that we haven't decided where to put it
+	 * yet.
+	 */
+	hwc->idx		= -1;
+	hwc->config_base	= event->attr.config;
+
+	if (hisi_pmu->ops->check_filter && hisi_pmu->ops->check_filter(event))
+		return -EINVAL;
+
+	/* Enforce to use the same CPU for all events in this PMU */
+	event->cpu = hisi_pmu->on_cpu;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_event_init);
+
+/*
+ * Set the counter to count the event that we're interested in,
+ * and enable interrupt and counter.
+ */
+static void hisi_uncore_pmu_enable_event(struct perf_event *event)
+{
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	hisi_pmu->ops->write_evtype(hisi_pmu, hwc->idx,
+				    HISI_GET_EVENTID(event));
+
+	if (hisi_pmu->ops->enable_filter)
+		hisi_pmu->ops->enable_filter(event);
+
+	hisi_pmu->ops->enable_counter_int(hisi_pmu, hwc);
+	hisi_pmu->ops->enable_counter(hisi_pmu, hwc);
+}
+
+/*
+ * Disable counter and interrupt.
+ */
+static void hisi_uncore_pmu_disable_event(struct perf_event *event)
+{
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	hisi_pmu->ops->disable_counter(hisi_pmu, hwc);
+	hisi_pmu->ops->disable_counter_int(hisi_pmu, hwc);
+
+	if (hisi_pmu->ops->disable_filter)
+		hisi_pmu->ops->disable_filter(event);
+}
+
+void hisi_uncore_pmu_set_event_period(struct perf_event *event)
+{
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	/*
+	 * The HiSilicon PMU counters support 32 bits or 48 bits, depending on
+	 * the PMU. We reduce it to 2^(counter_bits - 1) to account for the
+	 * extreme interrupt latency. So we could hopefully handle the overflow
+	 * interrupt before another 2^(counter_bits - 1) events occur and the
+	 * counter overtakes its previous value.
+	 */
+	u64 val = BIT_ULL(hisi_pmu->counter_bits - 1);
+
+	local64_set(&hwc->prev_count, val);
+	/* Write start value to the hardware event counter */
+	hisi_pmu->ops->write_counter(hisi_pmu, hwc, val);
+}
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_set_event_period);
+
+void hisi_uncore_pmu_event_update(struct perf_event *event)
+{
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 delta, prev_raw_count, new_raw_count;
+
+	do {
+		/* Read the count from the counter register */
+		new_raw_count = hisi_pmu->ops->read_counter(hisi_pmu, hwc);
+		prev_raw_count = local64_read(&hwc->prev_count);
+	} while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+				 new_raw_count) != prev_raw_count);
+	/*
+	 * compute the delta
+	 */
+	delta = (new_raw_count - prev_raw_count) &
+		HISI_MAX_PERIOD(hisi_pmu->counter_bits);
+	local64_add(delta, &event->count);
+}
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_event_update);
+
+void hisi_uncore_pmu_start(struct perf_event *event, int flags)
+{
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+		return;
+
+	WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+	hwc->state = 0;
+	hisi_uncore_pmu_set_event_period(event);
+
+	if (flags & PERF_EF_RELOAD) {
+		u64 prev_raw_count =  local64_read(&hwc->prev_count);
+
+		hisi_pmu->ops->write_counter(hisi_pmu, hwc, prev_raw_count);
+	}
+
+	hisi_uncore_pmu_enable_event(event);
+	perf_event_update_userpage(event);
+}
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_start);
+
+void hisi_uncore_pmu_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	hisi_uncore_pmu_disable_event(event);
+	WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+	hwc->state |= PERF_HES_STOPPED;
+
+	if (hwc->state & PERF_HES_UPTODATE)
+		return;
+
+	/* Read hardware counter and update the perf counter statistics */
+	hisi_uncore_pmu_event_update(event);
+	hwc->state |= PERF_HES_UPTODATE;
+}
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_stop);
+
+int hisi_uncore_pmu_add(struct perf_event *event, int flags)
+{
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+	/* Get an available counter index for counting */
+	idx = hisi_pmu->ops->get_event_idx(event);
+	if (idx < 0)
+		return idx;
+
+	event->hw.idx = idx;
+	hisi_pmu->pmu_events.hw_events[idx] = event;
+
+	if (flags & PERF_EF_START)
+		hisi_uncore_pmu_start(event, PERF_EF_RELOAD);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_add);
+
+void hisi_uncore_pmu_del(struct perf_event *event, int flags)
+{
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	hisi_uncore_pmu_stop(event, PERF_EF_UPDATE);
+	hisi_uncore_pmu_clear_event_idx(hisi_pmu, hwc->idx);
+	perf_event_update_userpage(event);
+	hisi_pmu->pmu_events.hw_events[hwc->idx] = NULL;
+}
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_del);
+
+void hisi_uncore_pmu_read(struct perf_event *event)
+{
+	/* Read hardware counter and update the perf counter statistics */
+	hisi_uncore_pmu_event_update(event);
+}
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_read);
+
+void hisi_uncore_pmu_enable(struct pmu *pmu)
+{
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(pmu);
+	bool enabled = !bitmap_empty(hisi_pmu->pmu_events.used_mask,
+				    hisi_pmu->num_counters);
+
+	if (!enabled)
+		return;
+
+	hisi_pmu->ops->start_counters(hisi_pmu);
+}
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_enable);
+
+void hisi_uncore_pmu_disable(struct pmu *pmu)
+{
+	struct hisi_pmu *hisi_pmu = to_hisi_pmu(pmu);
+
+	hisi_pmu->ops->stop_counters(hisi_pmu);
+}
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_disable);
+
+
+/*
+ * The Super CPU Cluster (SCCL) and CPU Cluster (CCL) IDs can be
+ * determined from the MPIDR_EL1, but the encoding varies by CPU:
+ *
+ * - For MT variants of TSV110:
+ *   SCCL is Aff2[7:3], CCL is Aff2[2:0]
+ *
+ * - For other MT parts:
+ *   SCCL is Aff3[7:0], CCL is Aff2[7:0]
+ *
+ * - For non-MT parts:
+ *   SCCL is Aff2[7:0], CCL is Aff1[7:0]
+ */
+static void hisi_read_sccl_and_ccl_id(int *scclp, int *cclp)
+{
+	u64 mpidr = read_cpuid_mpidr();
+	int aff3 = MPIDR_AFFINITY_LEVEL(mpidr, 3);
+	int aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2);
+	int aff1 = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	bool mt = mpidr & MPIDR_MT_BITMASK;
+	int sccl, ccl;
+
+	if (mt && read_cpuid_part_number() == HISI_CPU_PART_TSV110) {
+		sccl = aff2 >> 3;
+		ccl = aff2 & 0x7;
+	} else if (mt) {
+		sccl = aff3;
+		ccl = aff2;
+	} else {
+		sccl = aff2;
+		ccl = aff1;
+	}
+
+	if (scclp)
+		*scclp = sccl;
+	if (cclp)
+		*cclp = ccl;
+}
+
+/*
+ * Check whether the CPU is associated with this uncore PMU
+ */
+static bool hisi_pmu_cpu_is_associated_pmu(struct hisi_pmu *hisi_pmu)
+{
+	int sccl_id, ccl_id;
+
+	/* If SCCL_ID is -1, the PMU is in a SICL and has no CPU affinity */
+	if (hisi_pmu->sccl_id == -1)
+		return true;
+
+	if (hisi_pmu->ccl_id == -1) {
+		/* If CCL_ID is -1, the PMU only shares the same SCCL */
+		hisi_read_sccl_and_ccl_id(&sccl_id, NULL);
+
+		return sccl_id == hisi_pmu->sccl_id;
+	}
+
+	hisi_read_sccl_and_ccl_id(&sccl_id, &ccl_id);
+
+	return sccl_id == hisi_pmu->sccl_id && ccl_id == hisi_pmu->ccl_id;
+}
+
+int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct hisi_pmu *hisi_pmu = hlist_entry_safe(node, struct hisi_pmu,
+						     node);
+
+	if (!hisi_pmu_cpu_is_associated_pmu(hisi_pmu))
+		return 0;
+
+	cpumask_set_cpu(cpu, &hisi_pmu->associated_cpus);
+
+	/* If another CPU is already managing this PMU, simply return. */
+	if (hisi_pmu->on_cpu != -1)
+		return 0;
+
+	/* Use this CPU in cpumask for event counting */
+	hisi_pmu->on_cpu = cpu;
+
+	/* Overflow interrupt also should use the same CPU */
+	WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu)));
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_online_cpu);
+
+int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct hisi_pmu *hisi_pmu = hlist_entry_safe(node, struct hisi_pmu,
+						     node);
+	cpumask_t pmu_online_cpus;
+	unsigned int target;
+
+	if (!cpumask_test_and_clear_cpu(cpu, &hisi_pmu->associated_cpus))
+		return 0;
+
+	/* Nothing to do if this CPU doesn't own the PMU */
+	if (hisi_pmu->on_cpu != cpu)
+		return 0;
+
+	/* Give up ownership of the PMU */
+	hisi_pmu->on_cpu = -1;
+
+	/* Choose a new CPU to migrate ownership of the PMU to */
+	cpumask_and(&pmu_online_cpus, &hisi_pmu->associated_cpus,
+		    cpu_online_mask);
+	target = cpumask_any_but(&pmu_online_cpus, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	perf_pmu_migrate_context(&hisi_pmu->pmu, cpu, target);
+	/* Use this CPU for event counting */
+	hisi_pmu->on_cpu = target;
+	WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(target)));
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_offline_cpu);
+
+void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module)
+{
+	struct pmu *pmu = &hisi_pmu->pmu;
+
+	pmu->module             = module;
+	pmu->task_ctx_nr        = perf_invalid_context;
+	pmu->event_init         = hisi_uncore_pmu_event_init;
+	pmu->pmu_enable         = hisi_uncore_pmu_enable;
+	pmu->pmu_disable        = hisi_uncore_pmu_disable;
+	pmu->add                = hisi_uncore_pmu_add;
+	pmu->del                = hisi_uncore_pmu_del;
+	pmu->start              = hisi_uncore_pmu_start;
+	pmu->stop               = hisi_uncore_pmu_stop;
+	pmu->read               = hisi_uncore_pmu_read;
+	pmu->attr_groups        = hisi_pmu->pmu_events.attr_groups;
+	pmu->capabilities       = PERF_PMU_CAP_NO_EXCLUDE;
+}
+EXPORT_SYMBOL_GPL(hisi_pmu_init);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h
new file mode 100644
index 0000000000..92402aa69d
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * HiSilicon SoC Hardware event counters support
+ *
+ * Copyright (C) 2017 HiSilicon Limited
+ * Author: Anurup M <anurup.m@huawei.com>
+ *         Shaokun Zhang <zhangshaokun@hisilicon.com>
+ *
+ * This code is based on the uncore PMUs like arm-cci and arm-ccn.
+ */
+#ifndef __HISI_UNCORE_PMU_H__
+#define __HISI_UNCORE_PMU_H__
+
+#include <linux/bitfield.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+
+#undef pr_fmt
+#define pr_fmt(fmt)     "hisi_pmu: " fmt
+
+#define HISI_PMU_V2		0x30
+#define HISI_MAX_COUNTERS 0x10
+#define to_hisi_pmu(p)	(container_of(p, struct hisi_pmu, pmu))
+
+#define HISI_PMU_ATTR(_name, _func, _config)				\
+	(&((struct dev_ext_attribute[]) {				\
+		{ __ATTR(_name, 0444, _func, NULL), (void *)_config }   \
+	})[0].attr.attr)
+
+#define HISI_PMU_FORMAT_ATTR(_name, _config)		\
+	HISI_PMU_ATTR(_name, hisi_format_sysfs_show, (void *)_config)
+#define HISI_PMU_EVENT_ATTR(_name, _config)		\
+	HISI_PMU_ATTR(_name, hisi_event_sysfs_show, (unsigned long)_config)
+
+#define HISI_PMU_EVENT_ATTR_EXTRACTOR(name, config, hi, lo)        \
+	static inline u32 hisi_get_##name(struct perf_event *event)            \
+	{                                                                  \
+		return FIELD_GET(GENMASK_ULL(hi, lo), event->attr.config);  \
+	}
+
+#define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff)
+
+#define HISI_PMU_EVTYPE_BITS		8
+#define HISI_PMU_EVTYPE_SHIFT(idx)	((idx) % 4 * HISI_PMU_EVTYPE_BITS)
+
+struct hisi_pmu;
+
+struct hisi_uncore_ops {
+	int (*check_filter)(struct perf_event *event);
+	void (*write_evtype)(struct hisi_pmu *, int, u32);
+	int (*get_event_idx)(struct perf_event *);
+	u64 (*read_counter)(struct hisi_pmu *, struct hw_perf_event *);
+	void (*write_counter)(struct hisi_pmu *, struct hw_perf_event *, u64);
+	void (*enable_counter)(struct hisi_pmu *, struct hw_perf_event *);
+	void (*disable_counter)(struct hisi_pmu *, struct hw_perf_event *);
+	void (*enable_counter_int)(struct hisi_pmu *, struct hw_perf_event *);
+	void (*disable_counter_int)(struct hisi_pmu *, struct hw_perf_event *);
+	void (*start_counters)(struct hisi_pmu *);
+	void (*stop_counters)(struct hisi_pmu *);
+	u32 (*get_int_status)(struct hisi_pmu *hisi_pmu);
+	void (*clear_int_status)(struct hisi_pmu *hisi_pmu, int idx);
+	void (*enable_filter)(struct perf_event *event);
+	void (*disable_filter)(struct perf_event *event);
+};
+
+/* Describes the HISI PMU chip features information */
+struct hisi_pmu_dev_info {
+	const char *name;
+	const struct attribute_group **attr_groups;
+	void *private;
+};
+
+struct hisi_pmu_hwevents {
+	struct perf_event *hw_events[HISI_MAX_COUNTERS];
+	DECLARE_BITMAP(used_mask, HISI_MAX_COUNTERS);
+	const struct attribute_group **attr_groups;
+};
+
+/* Generic pmu struct for different pmu types */
+struct hisi_pmu {
+	struct pmu pmu;
+	const struct hisi_uncore_ops *ops;
+	const struct hisi_pmu_dev_info *dev_info;
+	struct hisi_pmu_hwevents pmu_events;
+	/* associated_cpus: All CPUs associated with the PMU */
+	cpumask_t associated_cpus;
+	/* CPU used for counting */
+	int on_cpu;
+	int irq;
+	struct device *dev;
+	struct hlist_node node;
+	int sccl_id;
+	int sicl_id;
+	int ccl_id;
+	void __iomem *base;
+	/* the ID of the PMU modules */
+	u32 index_id;
+	/* For DDRC PMU v2: each DDRC has more than one DMC */
+	u32 sub_id;
+	int num_counters;
+	int counter_bits;
+	/* check event code range */
+	int check_event;
+	u32 identifier;
+};
+
+int hisi_uncore_pmu_get_event_idx(struct perf_event *event);
+void hisi_uncore_pmu_read(struct perf_event *event);
+int hisi_uncore_pmu_add(struct perf_event *event, int flags);
+void hisi_uncore_pmu_del(struct perf_event *event, int flags);
+void hisi_uncore_pmu_start(struct perf_event *event, int flags);
+void hisi_uncore_pmu_stop(struct perf_event *event, int flags);
+void hisi_uncore_pmu_set_event_period(struct perf_event *event);
+void hisi_uncore_pmu_event_update(struct perf_event *event);
+int hisi_uncore_pmu_event_init(struct perf_event *event);
+void hisi_uncore_pmu_enable(struct pmu *pmu);
+void hisi_uncore_pmu_disable(struct pmu *pmu);
+ssize_t hisi_event_sysfs_show(struct device *dev,
+			      struct device_attribute *attr, char *buf);
+ssize_t hisi_format_sysfs_show(struct device *dev,
+			       struct device_attribute *attr, char *buf);
+ssize_t hisi_cpumask_sysfs_show(struct device *dev,
+				struct device_attribute *attr, char *buf);
+int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node);
+int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node);
+
+ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev,
+					     struct device_attribute *attr,
+					     char *page);
+int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu,
+			     struct platform_device *pdev);
+
+void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module);
+#endif /* __HISI_UNCORE_PMU_H__ */
diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
new file mode 100644
index 0000000000..e706ca5676
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
@@ -0,0 +1,514 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * HiSilicon SLLC uncore Hardware event counters support
+ *
+ * Copyright (C) 2020 HiSilicon Limited
+ * Author: Shaokun Zhang <zhangshaokun@hisilicon.com>
+ *
+ * This code is based on the uncore PMUs like arm-cci and arm-ccn.
+ */
+#include <linux/acpi.h>
+#include <linux/cpuhotplug.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/smp.h>
+
+#include "hisi_uncore_pmu.h"
+
+/* SLLC register definition */
+#define SLLC_INT_MASK			0x0814
+#define SLLC_INT_STATUS			0x0818
+#define SLLC_INT_CLEAR			0x081c
+#define SLLC_PERF_CTRL			0x1c00
+#define SLLC_SRCID_CTRL			0x1c04
+#define SLLC_TGTID_CTRL			0x1c08
+#define SLLC_EVENT_CTRL			0x1c14
+#define SLLC_EVENT_TYPE0		0x1c18
+#define SLLC_VERSION			0x1cf0
+#define SLLC_EVENT_CNT0_L		0x1d00
+
+#define SLLC_EVTYPE_MASK		0xff
+#define SLLC_PERF_CTRL_EN		BIT(0)
+#define SLLC_FILT_EN			BIT(1)
+#define SLLC_TRACETAG_EN		BIT(2)
+#define SLLC_SRCID_EN			BIT(4)
+#define SLLC_SRCID_NONE			0x0
+#define SLLC_TGTID_EN			BIT(5)
+#define SLLC_TGTID_NONE			0x0
+#define SLLC_TGTID_MIN_SHIFT		1
+#define SLLC_TGTID_MAX_SHIFT		12
+#define SLLC_SRCID_CMD_SHIFT		1
+#define SLLC_SRCID_MSK_SHIFT		12
+#define SLLC_NR_EVENTS			0x80
+
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_min, config1, 10, 0);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_max, config1, 21, 11);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 32, 22);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 43, 33);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tracetag_en, config1, 44, 44);
+
+static bool tgtid_is_valid(u32 max, u32 min)
+{
+	return max > 0 && max >= min;
+}
+
+static void hisi_sllc_pmu_enable_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu);
+	u32 tt_en = hisi_get_tracetag_en(event);
+
+	if (tt_en) {
+		u32 val;
+
+		val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+		val |= SLLC_TRACETAG_EN | SLLC_FILT_EN;
+		writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+	}
+}
+
+static void hisi_sllc_pmu_disable_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu);
+	u32 tt_en = hisi_get_tracetag_en(event);
+
+	if (tt_en) {
+		u32 val;
+
+		val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+		val &= ~(SLLC_TRACETAG_EN | SLLC_FILT_EN);
+		writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+	}
+}
+
+static void hisi_sllc_pmu_config_tgtid(struct perf_event *event)
+{
+	struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu);
+	u32 min = hisi_get_tgtid_min(event);
+	u32 max = hisi_get_tgtid_max(event);
+
+	if (tgtid_is_valid(max, min)) {
+		u32 val = (max << SLLC_TGTID_MAX_SHIFT) | (min << SLLC_TGTID_MIN_SHIFT);
+
+		writel(val, sllc_pmu->base + SLLC_TGTID_CTRL);
+		/* Enable the tgtid */
+		val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+		val |= SLLC_TGTID_EN | SLLC_FILT_EN;
+		writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+	}
+}
+
+static void hisi_sllc_pmu_clear_tgtid(struct perf_event *event)
+{
+	struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu);
+	u32 min = hisi_get_tgtid_min(event);
+	u32 max = hisi_get_tgtid_max(event);
+
+	if (tgtid_is_valid(max, min)) {
+		u32 val;
+
+		writel(SLLC_TGTID_NONE, sllc_pmu->base + SLLC_TGTID_CTRL);
+		/* Disable the tgtid */
+		val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+		val &= ~(SLLC_TGTID_EN | SLLC_FILT_EN);
+		writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+	}
+}
+
+static void hisi_sllc_pmu_config_srcid(struct perf_event *event)
+{
+	struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu);
+	u32 cmd = hisi_get_srcid_cmd(event);
+
+	if (cmd) {
+		u32 val, msk;
+
+		msk = hisi_get_srcid_msk(event);
+		val = (cmd << SLLC_SRCID_CMD_SHIFT) | (msk << SLLC_SRCID_MSK_SHIFT);
+		writel(val, sllc_pmu->base + SLLC_SRCID_CTRL);
+		/* Enable the srcid */
+		val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+		val |= SLLC_SRCID_EN | SLLC_FILT_EN;
+		writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+	}
+}
+
+static void hisi_sllc_pmu_clear_srcid(struct perf_event *event)
+{
+	struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu);
+	u32 cmd = hisi_get_srcid_cmd(event);
+
+	if (cmd) {
+		u32 val;
+
+		writel(SLLC_SRCID_NONE, sllc_pmu->base + SLLC_SRCID_CTRL);
+		/* Disable the srcid */
+		val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+		val &= ~(SLLC_SRCID_EN | SLLC_FILT_EN);
+		writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+	}
+}
+
+static void hisi_sllc_pmu_enable_filter(struct perf_event *event)
+{
+	if (event->attr.config1 != 0x0) {
+		hisi_sllc_pmu_enable_tracetag(event);
+		hisi_sllc_pmu_config_srcid(event);
+		hisi_sllc_pmu_config_tgtid(event);
+	}
+}
+
+static void hisi_sllc_pmu_clear_filter(struct perf_event *event)
+{
+	if (event->attr.config1 != 0x0) {
+		hisi_sllc_pmu_disable_tracetag(event);
+		hisi_sllc_pmu_clear_srcid(event);
+		hisi_sllc_pmu_clear_tgtid(event);
+	}
+}
+
+static u32 hisi_sllc_pmu_get_counter_offset(int idx)
+{
+	return (SLLC_EVENT_CNT0_L + idx * 8);
+}
+
+static u64 hisi_sllc_pmu_read_counter(struct hisi_pmu *sllc_pmu,
+				      struct hw_perf_event *hwc)
+{
+	return readq(sllc_pmu->base +
+		     hisi_sllc_pmu_get_counter_offset(hwc->idx));
+}
+
+static void hisi_sllc_pmu_write_counter(struct hisi_pmu *sllc_pmu,
+					struct hw_perf_event *hwc, u64 val)
+{
+	writeq(val, sllc_pmu->base +
+	       hisi_sllc_pmu_get_counter_offset(hwc->idx));
+}
+
+static void hisi_sllc_pmu_write_evtype(struct hisi_pmu *sllc_pmu, int idx,
+				       u32 type)
+{
+	u32 reg, reg_idx, shift, val;
+
+	/*
+	 * Select the appropriate event select register(SLLC_EVENT_TYPE0/1).
+	 * There are 2 event select registers for the 8 hardware counters.
+	 * Event code is 8-bits and for the former 4 hardware counters,
+	 * SLLC_EVENT_TYPE0 is chosen. For the latter 4 hardware counters,
+	 * SLLC_EVENT_TYPE1 is chosen.
+	 */
+	reg = SLLC_EVENT_TYPE0 + (idx / 4) * 4;
+	reg_idx = idx % 4;
+	shift = 8 * reg_idx;
+
+	/* Write event code to SLLC_EVENT_TYPEx Register */
+	val = readl(sllc_pmu->base + reg);
+	val &= ~(SLLC_EVTYPE_MASK << shift);
+	val |= (type << shift);
+	writel(val, sllc_pmu->base + reg);
+}
+
+static void hisi_sllc_pmu_start_counters(struct hisi_pmu *sllc_pmu)
+{
+	u32 val;
+
+	val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+	val |= SLLC_PERF_CTRL_EN;
+	writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+}
+
+static void hisi_sllc_pmu_stop_counters(struct hisi_pmu *sllc_pmu)
+{
+	u32 val;
+
+	val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+	val &= ~(SLLC_PERF_CTRL_EN);
+	writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+}
+
+static void hisi_sllc_pmu_enable_counter(struct hisi_pmu *sllc_pmu,
+					 struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	val = readl(sllc_pmu->base + SLLC_EVENT_CTRL);
+	val |= 1 << hwc->idx;
+	writel(val, sllc_pmu->base + SLLC_EVENT_CTRL);
+}
+
+static void hisi_sllc_pmu_disable_counter(struct hisi_pmu *sllc_pmu,
+					  struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	val = readl(sllc_pmu->base + SLLC_EVENT_CTRL);
+	val &= ~(1 << hwc->idx);
+	writel(val, sllc_pmu->base + SLLC_EVENT_CTRL);
+}
+
+static void hisi_sllc_pmu_enable_counter_int(struct hisi_pmu *sllc_pmu,
+					     struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	val = readl(sllc_pmu->base + SLLC_INT_MASK);
+	/* Write 0 to enable interrupt */
+	val &= ~(1 << hwc->idx);
+	writel(val, sllc_pmu->base + SLLC_INT_MASK);
+}
+
+static void hisi_sllc_pmu_disable_counter_int(struct hisi_pmu *sllc_pmu,
+					      struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	val = readl(sllc_pmu->base + SLLC_INT_MASK);
+	/* Write 1 to mask interrupt */
+	val |= 1 << hwc->idx;
+	writel(val, sllc_pmu->base + SLLC_INT_MASK);
+}
+
+static u32 hisi_sllc_pmu_get_int_status(struct hisi_pmu *sllc_pmu)
+{
+	return readl(sllc_pmu->base + SLLC_INT_STATUS);
+}
+
+static void hisi_sllc_pmu_clear_int_status(struct hisi_pmu *sllc_pmu, int idx)
+{
+	writel(1 << idx, sllc_pmu->base + SLLC_INT_CLEAR);
+}
+
+static const struct acpi_device_id hisi_sllc_pmu_acpi_match[] = {
+	{ "HISI0263", },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, hisi_sllc_pmu_acpi_match);
+
+static int hisi_sllc_pmu_init_data(struct platform_device *pdev,
+				   struct hisi_pmu *sllc_pmu)
+{
+	/*
+	 * Use the SCCL_ID and the index ID to identify the SLLC PMU,
+	 * while SCCL_ID is from MPIDR_EL1 by CPU.
+	 */
+	if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
+				     &sllc_pmu->sccl_id)) {
+		dev_err(&pdev->dev, "Cannot read sccl-id!\n");
+		return -EINVAL;
+	}
+
+	if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id",
+				     &sllc_pmu->index_id)) {
+		dev_err(&pdev->dev, "Cannot read idx-id!\n");
+		return -EINVAL;
+	}
+
+	/* SLLC PMUs only share the same SCCL */
+	sllc_pmu->ccl_id = -1;
+
+	sllc_pmu->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(sllc_pmu->base)) {
+		dev_err(&pdev->dev, "ioremap failed for sllc_pmu resource.\n");
+		return PTR_ERR(sllc_pmu->base);
+	}
+
+	sllc_pmu->identifier = readl(sllc_pmu->base + SLLC_VERSION);
+
+	return 0;
+}
+
+static struct attribute *hisi_sllc_pmu_v2_format_attr[] = {
+	HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+	HISI_PMU_FORMAT_ATTR(tgtid_min, "config1:0-10"),
+	HISI_PMU_FORMAT_ATTR(tgtid_max, "config1:11-21"),
+	HISI_PMU_FORMAT_ATTR(srcid_cmd, "config1:22-32"),
+	HISI_PMU_FORMAT_ATTR(srcid_msk, "config1:33-43"),
+	HISI_PMU_FORMAT_ATTR(tracetag_en, "config1:44"),
+	NULL
+};
+
+static const struct attribute_group hisi_sllc_pmu_v2_format_group = {
+	.name = "format",
+	.attrs = hisi_sllc_pmu_v2_format_attr,
+};
+
+static struct attribute *hisi_sllc_pmu_v2_events_attr[] = {
+	HISI_PMU_EVENT_ATTR(rx_req,             0x30),
+	HISI_PMU_EVENT_ATTR(rx_data,            0x31),
+	HISI_PMU_EVENT_ATTR(tx_req,             0x34),
+	HISI_PMU_EVENT_ATTR(tx_data,            0x35),
+	HISI_PMU_EVENT_ATTR(cycles,             0x09),
+	NULL
+};
+
+static const struct attribute_group hisi_sllc_pmu_v2_events_group = {
+	.name = "events",
+	.attrs = hisi_sllc_pmu_v2_events_attr,
+};
+
+static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
+
+static struct attribute *hisi_sllc_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL
+};
+
+static const struct attribute_group hisi_sllc_pmu_cpumask_attr_group = {
+	.attrs = hisi_sllc_pmu_cpumask_attrs,
+};
+
+static struct device_attribute hisi_sllc_pmu_identifier_attr =
+	__ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
+
+static struct attribute *hisi_sllc_pmu_identifier_attrs[] = {
+	&hisi_sllc_pmu_identifier_attr.attr,
+	NULL
+};
+
+static const struct attribute_group hisi_sllc_pmu_identifier_group = {
+	.attrs = hisi_sllc_pmu_identifier_attrs,
+};
+
+static const struct attribute_group *hisi_sllc_pmu_v2_attr_groups[] = {
+	&hisi_sllc_pmu_v2_format_group,
+	&hisi_sllc_pmu_v2_events_group,
+	&hisi_sllc_pmu_cpumask_attr_group,
+	&hisi_sllc_pmu_identifier_group,
+	NULL
+};
+
+static const struct hisi_uncore_ops hisi_uncore_sllc_ops = {
+	.write_evtype		= hisi_sllc_pmu_write_evtype,
+	.get_event_idx		= hisi_uncore_pmu_get_event_idx,
+	.start_counters		= hisi_sllc_pmu_start_counters,
+	.stop_counters		= hisi_sllc_pmu_stop_counters,
+	.enable_counter		= hisi_sllc_pmu_enable_counter,
+	.disable_counter	= hisi_sllc_pmu_disable_counter,
+	.enable_counter_int	= hisi_sllc_pmu_enable_counter_int,
+	.disable_counter_int	= hisi_sllc_pmu_disable_counter_int,
+	.write_counter		= hisi_sllc_pmu_write_counter,
+	.read_counter		= hisi_sllc_pmu_read_counter,
+	.get_int_status		= hisi_sllc_pmu_get_int_status,
+	.clear_int_status	= hisi_sllc_pmu_clear_int_status,
+	.enable_filter		= hisi_sllc_pmu_enable_filter,
+	.disable_filter		= hisi_sllc_pmu_clear_filter,
+};
+
+static int hisi_sllc_pmu_dev_probe(struct platform_device *pdev,
+				   struct hisi_pmu *sllc_pmu)
+{
+	int ret;
+
+	ret = hisi_sllc_pmu_init_data(pdev, sllc_pmu);
+	if (ret)
+		return ret;
+
+	ret = hisi_uncore_pmu_init_irq(sllc_pmu, pdev);
+	if (ret)
+		return ret;
+
+	sllc_pmu->pmu_events.attr_groups = hisi_sllc_pmu_v2_attr_groups;
+	sllc_pmu->ops = &hisi_uncore_sllc_ops;
+	sllc_pmu->check_event = SLLC_NR_EVENTS;
+	sllc_pmu->counter_bits = 64;
+	sllc_pmu->num_counters = 8;
+	sllc_pmu->dev = &pdev->dev;
+	sllc_pmu->on_cpu = -1;
+
+	return 0;
+}
+
+static int hisi_sllc_pmu_probe(struct platform_device *pdev)
+{
+	struct hisi_pmu *sllc_pmu;
+	char *name;
+	int ret;
+
+	sllc_pmu = devm_kzalloc(&pdev->dev, sizeof(*sllc_pmu), GFP_KERNEL);
+	if (!sllc_pmu)
+		return -ENOMEM;
+
+	ret = hisi_sllc_pmu_dev_probe(pdev, sllc_pmu);
+	if (ret)
+		return ret;
+
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_sllc%u",
+			      sllc_pmu->sccl_id, sllc_pmu->index_id);
+	if (!name)
+		return -ENOMEM;
+
+	ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE,
+				       &sllc_pmu->node);
+	if (ret) {
+		dev_err(&pdev->dev, "Error %d registering hotplug\n", ret);
+		return ret;
+	}
+
+	hisi_pmu_init(sllc_pmu, THIS_MODULE);
+
+	ret = perf_pmu_register(&sllc_pmu->pmu, name, -1);
+	if (ret) {
+		dev_err(sllc_pmu->dev, "PMU register failed, ret = %d\n", ret);
+		cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE,
+						    &sllc_pmu->node);
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, sllc_pmu);
+
+	return ret;
+}
+
+static int hisi_sllc_pmu_remove(struct platform_device *pdev)
+{
+	struct hisi_pmu *sllc_pmu = platform_get_drvdata(pdev);
+
+	perf_pmu_unregister(&sllc_pmu->pmu);
+	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE,
+					    &sllc_pmu->node);
+	return 0;
+}
+
+static struct platform_driver hisi_sllc_pmu_driver = {
+	.driver = {
+		.name = "hisi_sllc_pmu",
+		.acpi_match_table = hisi_sllc_pmu_acpi_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe = hisi_sllc_pmu_probe,
+	.remove = hisi_sllc_pmu_remove,
+};
+
+static int __init hisi_sllc_pmu_module_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE,
+				      "AP_PERF_ARM_HISI_SLLC_ONLINE",
+				      hisi_uncore_pmu_online_cpu,
+				      hisi_uncore_pmu_offline_cpu);
+	if (ret) {
+		pr_err("SLLC PMU: cpuhp state setup failed, ret = %d\n", ret);
+		return ret;
+	}
+
+	ret = platform_driver_register(&hisi_sllc_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE);
+
+	return ret;
+}
+module_init(hisi_sllc_pmu_module_init);
+
+static void __exit hisi_sllc_pmu_module_exit(void)
+{
+	platform_driver_unregister(&hisi_sllc_pmu_driver);
+	cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE);
+}
+module_exit(hisi_sllc_pmu_module_exit);
+
+MODULE_DESCRIPTION("HiSilicon SLLC uncore PMU driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>");
+MODULE_AUTHOR("Qi Liu <liuqi115@huawei.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c
new file mode 100644
index 0000000000..636fb79647
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c
@@ -0,0 +1,578 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * HiSilicon SoC UC (unified cache) uncore Hardware event counters support
+ *
+ * Copyright (C) 2023 HiSilicon Limited
+ *
+ * This code is based on the uncore PMUs like hisi_uncore_l3c_pmu.
+ */
+#include <linux/cpuhotplug.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/mod_devicetable.h>
+#include <linux/property.h>
+
+#include "hisi_uncore_pmu.h"
+
+/* Dynamic CPU hotplug state used by UC PMU */
+static enum cpuhp_state hisi_uc_pmu_online;
+
+/* UC register definition */
+#define HISI_UC_INT_MASK_REG		0x0800
+#define HISI_UC_INT_STS_REG		0x0808
+#define HISI_UC_INT_CLEAR_REG		0x080c
+#define HISI_UC_TRACETAG_CTRL_REG	0x1b2c
+#define HISI_UC_TRACETAG_REQ_MSK	GENMASK(9, 7)
+#define HISI_UC_TRACETAG_MARK_EN	BIT(0)
+#define HISI_UC_TRACETAG_REQ_EN		(HISI_UC_TRACETAG_MARK_EN | BIT(2))
+#define HISI_UC_TRACETAG_SRCID_EN	BIT(3)
+#define HISI_UC_SRCID_CTRL_REG		0x1b40
+#define HISI_UC_SRCID_MSK		GENMASK(14, 1)
+#define HISI_UC_EVENT_CTRL_REG		0x1c00
+#define HISI_UC_EVENT_TRACETAG_EN	BIT(29)
+#define HISI_UC_EVENT_URING_MSK		GENMASK(28, 27)
+#define HISI_UC_EVENT_GLB_EN		BIT(26)
+#define HISI_UC_VERSION_REG		0x1cf0
+#define HISI_UC_EVTYPE_REGn(n)		(0x1d00 + (n) * 4)
+#define HISI_UC_EVTYPE_MASK		GENMASK(7, 0)
+#define HISI_UC_CNTR_REGn(n)		(0x1e00 + (n) * 8)
+
+#define HISI_UC_NR_COUNTERS		0x8
+#define HISI_UC_V2_NR_EVENTS		0xFF
+#define HISI_UC_CNTR_REG_BITS		64
+
+#define HISI_UC_RD_REQ_TRACETAG		0x4
+#define HISI_UC_URING_EVENT_MIN		0x47
+#define HISI_UC_URING_EVENT_MAX		0x59
+
+HISI_PMU_EVENT_ATTR_EXTRACTOR(rd_req_en, config1, 0, 0);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(uring_channel, config1, 5, 4);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid, config1, 19, 6);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_en, config1, 20, 20);
+
+static int hisi_uc_pmu_check_filter(struct perf_event *event)
+{
+	struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu);
+
+	if (hisi_get_srcid_en(event) && !hisi_get_rd_req_en(event)) {
+		dev_err(uc_pmu->dev,
+			"rcid_en depends on rd_req_en being enabled!\n");
+		return -EINVAL;
+	}
+
+	if (!hisi_get_uring_channel(event))
+		return 0;
+
+	if ((HISI_GET_EVENTID(event) < HISI_UC_URING_EVENT_MIN) ||
+	    (HISI_GET_EVENTID(event) > HISI_UC_URING_EVENT_MAX))
+		dev_warn(uc_pmu->dev,
+			 "Only events: [%#x ~ %#x] support channel filtering!",
+			 HISI_UC_URING_EVENT_MIN, HISI_UC_URING_EVENT_MAX);
+
+	return 0;
+}
+
+static void hisi_uc_pmu_config_req_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu);
+	u32 val;
+
+	if (!hisi_get_rd_req_en(event))
+		return;
+
+	val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG);
+
+	/* The request-type has been configured */
+	if (FIELD_GET(HISI_UC_TRACETAG_REQ_MSK, val) == HISI_UC_RD_REQ_TRACETAG)
+		return;
+
+	/* Set request-type for tracetag, only read request is supported! */
+	val &= ~HISI_UC_TRACETAG_REQ_MSK;
+	val |= FIELD_PREP(HISI_UC_TRACETAG_REQ_MSK, HISI_UC_RD_REQ_TRACETAG);
+	val |= HISI_UC_TRACETAG_REQ_EN;
+	writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG);
+}
+
+static void hisi_uc_pmu_clear_req_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu);
+	u32 val;
+
+	if (!hisi_get_rd_req_en(event))
+		return;
+
+	val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG);
+
+	/* Do nothing, the request-type tracetag has been cleaned up */
+	if (FIELD_GET(HISI_UC_TRACETAG_REQ_MSK, val) == 0)
+		return;
+
+	/* Clear request-type */
+	val &= ~HISI_UC_TRACETAG_REQ_MSK;
+	val &= ~HISI_UC_TRACETAG_REQ_EN;
+	writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG);
+}
+
+static void hisi_uc_pmu_config_srcid_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu);
+	u32 val;
+
+	if (!hisi_get_srcid_en(event))
+		return;
+
+	val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG);
+
+	/* Do nothing, the source id has been configured */
+	if (FIELD_GET(HISI_UC_TRACETAG_SRCID_EN, val))
+		return;
+
+	/* Enable source id tracetag */
+	val |= HISI_UC_TRACETAG_SRCID_EN;
+	writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG);
+
+	val = readl(uc_pmu->base + HISI_UC_SRCID_CTRL_REG);
+	val &= ~HISI_UC_SRCID_MSK;
+	val |= FIELD_PREP(HISI_UC_SRCID_MSK, hisi_get_srcid(event));
+	writel(val, uc_pmu->base + HISI_UC_SRCID_CTRL_REG);
+
+	/* Depend on request-type tracetag enabled */
+	hisi_uc_pmu_config_req_tracetag(event);
+}
+
+static void hisi_uc_pmu_clear_srcid_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu);
+	u32 val;
+
+	if (!hisi_get_srcid_en(event))
+		return;
+
+	val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG);
+
+	/* Do nothing, the source id has been cleaned up */
+	if (FIELD_GET(HISI_UC_TRACETAG_SRCID_EN, val) == 0)
+		return;
+
+	hisi_uc_pmu_clear_req_tracetag(event);
+
+	/* Disable source id tracetag */
+	val &= ~HISI_UC_TRACETAG_SRCID_EN;
+	writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG);
+
+	val = readl(uc_pmu->base + HISI_UC_SRCID_CTRL_REG);
+	val &= ~HISI_UC_SRCID_MSK;
+	writel(val, uc_pmu->base + HISI_UC_SRCID_CTRL_REG);
+}
+
+static void hisi_uc_pmu_config_uring_channel(struct perf_event *event)
+{
+	struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu);
+	u32 uring_channel = hisi_get_uring_channel(event);
+	u32 val;
+
+	/* Do nothing if not being set or is set explicitly to zero (default) */
+	if (uring_channel == 0)
+		return;
+
+	val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+
+	/* Do nothing, the uring_channel has been configured */
+	if (uring_channel == FIELD_GET(HISI_UC_EVENT_URING_MSK, val))
+		return;
+
+	val &= ~HISI_UC_EVENT_URING_MSK;
+	val |= FIELD_PREP(HISI_UC_EVENT_URING_MSK, uring_channel);
+	writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+}
+
+static void hisi_uc_pmu_clear_uring_channel(struct perf_event *event)
+{
+	struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu);
+	u32 val;
+
+	/* Do nothing if not being set or is set explicitly to zero (default) */
+	if (hisi_get_uring_channel(event) == 0)
+		return;
+
+	val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+
+	/* Do nothing, the uring_channel has been cleaned up */
+	if (FIELD_GET(HISI_UC_EVENT_URING_MSK, val) == 0)
+		return;
+
+	val &= ~HISI_UC_EVENT_URING_MSK;
+	writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+}
+
+static void hisi_uc_pmu_enable_filter(struct perf_event *event)
+{
+	if (event->attr.config1 == 0)
+		return;
+
+	hisi_uc_pmu_config_uring_channel(event);
+	hisi_uc_pmu_config_req_tracetag(event);
+	hisi_uc_pmu_config_srcid_tracetag(event);
+}
+
+static void hisi_uc_pmu_disable_filter(struct perf_event *event)
+{
+	if (event->attr.config1 == 0)
+		return;
+
+	hisi_uc_pmu_clear_srcid_tracetag(event);
+	hisi_uc_pmu_clear_req_tracetag(event);
+	hisi_uc_pmu_clear_uring_channel(event);
+}
+
+static void hisi_uc_pmu_write_evtype(struct hisi_pmu *uc_pmu, int idx, u32 type)
+{
+	u32 val;
+
+	/*
+	 * Select the appropriate event select register.
+	 * There are 2 32-bit event select registers for the
+	 * 8 hardware counters, each event code is 8-bit wide.
+	 */
+	val = readl(uc_pmu->base + HISI_UC_EVTYPE_REGn(idx / 4));
+	val &= ~(HISI_UC_EVTYPE_MASK << HISI_PMU_EVTYPE_SHIFT(idx));
+	val |= (type << HISI_PMU_EVTYPE_SHIFT(idx));
+	writel(val, uc_pmu->base + HISI_UC_EVTYPE_REGn(idx / 4));
+}
+
+static void hisi_uc_pmu_start_counters(struct hisi_pmu *uc_pmu)
+{
+	u32 val;
+
+	val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+	val |= HISI_UC_EVENT_GLB_EN;
+	writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+}
+
+static void hisi_uc_pmu_stop_counters(struct hisi_pmu *uc_pmu)
+{
+	u32 val;
+
+	val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+	val &= ~HISI_UC_EVENT_GLB_EN;
+	writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+}
+
+static void hisi_uc_pmu_enable_counter(struct hisi_pmu *uc_pmu,
+					struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Enable counter index */
+	val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+	val |= (1 << hwc->idx);
+	writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+}
+
+static void hisi_uc_pmu_disable_counter(struct hisi_pmu *uc_pmu,
+					struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Clear counter index */
+	val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+	val &= ~(1 << hwc->idx);
+	writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG);
+}
+
+static u64 hisi_uc_pmu_read_counter(struct hisi_pmu *uc_pmu,
+				    struct hw_perf_event *hwc)
+{
+	return readq(uc_pmu->base + HISI_UC_CNTR_REGn(hwc->idx));
+}
+
+static void hisi_uc_pmu_write_counter(struct hisi_pmu *uc_pmu,
+				      struct hw_perf_event *hwc, u64 val)
+{
+	writeq(val, uc_pmu->base + HISI_UC_CNTR_REGn(hwc->idx));
+}
+
+static void hisi_uc_pmu_enable_counter_int(struct hisi_pmu *uc_pmu,
+					   struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	val = readl(uc_pmu->base + HISI_UC_INT_MASK_REG);
+	val &= ~(1 << hwc->idx);
+	writel(val, uc_pmu->base + HISI_UC_INT_MASK_REG);
+}
+
+static void hisi_uc_pmu_disable_counter_int(struct hisi_pmu *uc_pmu,
+					    struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	val = readl(uc_pmu->base + HISI_UC_INT_MASK_REG);
+	val |= (1 << hwc->idx);
+	writel(val, uc_pmu->base + HISI_UC_INT_MASK_REG);
+}
+
+static u32 hisi_uc_pmu_get_int_status(struct hisi_pmu *uc_pmu)
+{
+	return readl(uc_pmu->base + HISI_UC_INT_STS_REG);
+}
+
+static void hisi_uc_pmu_clear_int_status(struct hisi_pmu *uc_pmu, int idx)
+{
+	writel(1 << idx, uc_pmu->base + HISI_UC_INT_CLEAR_REG);
+}
+
+static int hisi_uc_pmu_init_data(struct platform_device *pdev,
+				 struct hisi_pmu *uc_pmu)
+{
+	/*
+	 * Use SCCL (Super CPU Cluster) ID and CCL (CPU Cluster) ID to
+	 * identify the topology information of UC PMU devices in the chip.
+	 * They have some CCLs per SCCL and then 4 UC PMU per CCL.
+	 */
+	if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
+				     &uc_pmu->sccl_id)) {
+		dev_err(&pdev->dev, "Can not read uc sccl-id!\n");
+		return -EINVAL;
+	}
+
+	if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id",
+				     &uc_pmu->ccl_id)) {
+		dev_err(&pdev->dev, "Can not read uc ccl-id!\n");
+		return -EINVAL;
+	}
+
+	if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id",
+				     &uc_pmu->sub_id)) {
+		dev_err(&pdev->dev, "Can not read sub-id!\n");
+		return -EINVAL;
+	}
+
+	uc_pmu->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(uc_pmu->base)) {
+		dev_err(&pdev->dev, "ioremap failed for uc_pmu resource\n");
+		return PTR_ERR(uc_pmu->base);
+	}
+
+	uc_pmu->identifier = readl(uc_pmu->base + HISI_UC_VERSION_REG);
+
+	return 0;
+}
+
+static struct attribute *hisi_uc_pmu_format_attr[] = {
+	HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+	HISI_PMU_FORMAT_ATTR(rd_req_en, "config1:0-0"),
+	HISI_PMU_FORMAT_ATTR(uring_channel, "config1:4-5"),
+	HISI_PMU_FORMAT_ATTR(srcid, "config1:6-19"),
+	HISI_PMU_FORMAT_ATTR(srcid_en, "config1:20-20"),
+	NULL
+};
+
+static const struct attribute_group hisi_uc_pmu_format_group = {
+	.name = "format",
+	.attrs = hisi_uc_pmu_format_attr,
+};
+
+static struct attribute *hisi_uc_pmu_events_attr[] = {
+	HISI_PMU_EVENT_ATTR(sq_time,		0x00),
+	HISI_PMU_EVENT_ATTR(pq_time,		0x01),
+	HISI_PMU_EVENT_ATTR(hbm_time,		0x02),
+	HISI_PMU_EVENT_ATTR(iq_comp_time_cring,	0x03),
+	HISI_PMU_EVENT_ATTR(iq_comp_time_uring,	0x05),
+	HISI_PMU_EVENT_ATTR(cpu_rd,		0x10),
+	HISI_PMU_EVENT_ATTR(cpu_rd64,		0x17),
+	HISI_PMU_EVENT_ATTR(cpu_rs64,		0x19),
+	HISI_PMU_EVENT_ATTR(cpu_mru,		0x1c),
+	HISI_PMU_EVENT_ATTR(cycles,		0x95),
+	HISI_PMU_EVENT_ATTR(spipe_hit,		0xb3),
+	HISI_PMU_EVENT_ATTR(hpipe_hit,		0xdb),
+	HISI_PMU_EVENT_ATTR(cring_rxdat_cnt,	0xfa),
+	HISI_PMU_EVENT_ATTR(cring_txdat_cnt,	0xfb),
+	HISI_PMU_EVENT_ATTR(uring_rxdat_cnt,	0xfc),
+	HISI_PMU_EVENT_ATTR(uring_txdat_cnt,	0xfd),
+	NULL
+};
+
+static const struct attribute_group hisi_uc_pmu_events_group = {
+	.name = "events",
+	.attrs = hisi_uc_pmu_events_attr,
+};
+
+static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
+
+static struct attribute *hisi_uc_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static const struct attribute_group hisi_uc_pmu_cpumask_attr_group = {
+	.attrs = hisi_uc_pmu_cpumask_attrs,
+};
+
+static struct device_attribute hisi_uc_pmu_identifier_attr =
+	__ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
+
+static struct attribute *hisi_uc_pmu_identifier_attrs[] = {
+	&hisi_uc_pmu_identifier_attr.attr,
+	NULL
+};
+
+static const struct attribute_group hisi_uc_pmu_identifier_group = {
+	.attrs = hisi_uc_pmu_identifier_attrs,
+};
+
+static const struct attribute_group *hisi_uc_pmu_attr_groups[] = {
+	&hisi_uc_pmu_format_group,
+	&hisi_uc_pmu_events_group,
+	&hisi_uc_pmu_cpumask_attr_group,
+	&hisi_uc_pmu_identifier_group,
+	NULL
+};
+
+static const struct hisi_uncore_ops hisi_uncore_uc_pmu_ops = {
+	.check_filter		= hisi_uc_pmu_check_filter,
+	.write_evtype		= hisi_uc_pmu_write_evtype,
+	.get_event_idx		= hisi_uncore_pmu_get_event_idx,
+	.start_counters		= hisi_uc_pmu_start_counters,
+	.stop_counters		= hisi_uc_pmu_stop_counters,
+	.enable_counter		= hisi_uc_pmu_enable_counter,
+	.disable_counter	= hisi_uc_pmu_disable_counter,
+	.enable_counter_int	= hisi_uc_pmu_enable_counter_int,
+	.disable_counter_int	= hisi_uc_pmu_disable_counter_int,
+	.write_counter		= hisi_uc_pmu_write_counter,
+	.read_counter		= hisi_uc_pmu_read_counter,
+	.get_int_status		= hisi_uc_pmu_get_int_status,
+	.clear_int_status	= hisi_uc_pmu_clear_int_status,
+	.enable_filter		= hisi_uc_pmu_enable_filter,
+	.disable_filter		= hisi_uc_pmu_disable_filter,
+};
+
+static int hisi_uc_pmu_dev_probe(struct platform_device *pdev,
+				 struct hisi_pmu *uc_pmu)
+{
+	int ret;
+
+	ret = hisi_uc_pmu_init_data(pdev, uc_pmu);
+	if (ret)
+		return ret;
+
+	ret = hisi_uncore_pmu_init_irq(uc_pmu, pdev);
+	if (ret)
+		return ret;
+
+	uc_pmu->pmu_events.attr_groups = hisi_uc_pmu_attr_groups;
+	uc_pmu->check_event = HISI_UC_EVTYPE_MASK;
+	uc_pmu->ops = &hisi_uncore_uc_pmu_ops;
+	uc_pmu->counter_bits = HISI_UC_CNTR_REG_BITS;
+	uc_pmu->num_counters = HISI_UC_NR_COUNTERS;
+	uc_pmu->dev = &pdev->dev;
+	uc_pmu->on_cpu = -1;
+
+	return 0;
+}
+
+static void hisi_uc_pmu_remove_cpuhp_instance(void *hotplug_node)
+{
+	cpuhp_state_remove_instance_nocalls(hisi_uc_pmu_online, hotplug_node);
+}
+
+static void hisi_uc_pmu_unregister_pmu(void *pmu)
+{
+	perf_pmu_unregister(pmu);
+}
+
+static int hisi_uc_pmu_probe(struct platform_device *pdev)
+{
+	struct hisi_pmu *uc_pmu;
+	char *name;
+	int ret;
+
+	uc_pmu = devm_kzalloc(&pdev->dev, sizeof(*uc_pmu), GFP_KERNEL);
+	if (!uc_pmu)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, uc_pmu);
+
+	ret = hisi_uc_pmu_dev_probe(pdev, uc_pmu);
+	if (ret)
+		return ret;
+
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_uc%d_%u",
+			      uc_pmu->sccl_id, uc_pmu->ccl_id, uc_pmu->sub_id);
+	if (!name)
+		return -ENOMEM;
+
+	ret = cpuhp_state_add_instance(hisi_uc_pmu_online, &uc_pmu->node);
+	if (ret)
+		return dev_err_probe(&pdev->dev, ret, "Error registering hotplug\n");
+
+	ret = devm_add_action_or_reset(&pdev->dev,
+				       hisi_uc_pmu_remove_cpuhp_instance,
+				       &uc_pmu->node);
+	if (ret)
+		return ret;
+
+	hisi_pmu_init(uc_pmu, THIS_MODULE);
+
+	ret = perf_pmu_register(&uc_pmu->pmu, name, -1);
+	if (ret)
+		return ret;
+
+	return devm_add_action_or_reset(&pdev->dev,
+					hisi_uc_pmu_unregister_pmu,
+					&uc_pmu->pmu);
+}
+
+static const struct acpi_device_id hisi_uc_pmu_acpi_match[] = {
+	{ "HISI0291", },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, hisi_uc_pmu_acpi_match);
+
+static struct platform_driver hisi_uc_pmu_driver = {
+	.driver = {
+		.name = "hisi_uc_pmu",
+		.acpi_match_table = hisi_uc_pmu_acpi_match,
+		/*
+		 * We have not worked out a safe bind/unbind process,
+		 * Forcefully unbinding during sampling will lead to a
+		 * kernel panic, so this is not supported yet.
+		 */
+		.suppress_bind_attrs = true,
+	},
+	.probe = hisi_uc_pmu_probe,
+};
+
+static int __init hisi_uc_pmu_module_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+				      "perf/hisi/uc:online",
+				      hisi_uncore_pmu_online_cpu,
+				      hisi_uncore_pmu_offline_cpu);
+	if (ret < 0) {
+		pr_err("UC PMU: Error setup hotplug, ret = %d\n", ret);
+		return ret;
+	}
+	hisi_uc_pmu_online = ret;
+
+	ret = platform_driver_register(&hisi_uc_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(hisi_uc_pmu_online);
+
+	return ret;
+}
+module_init(hisi_uc_pmu_module_init);
+
+static void __exit hisi_uc_pmu_module_exit(void)
+{
+	platform_driver_unregister(&hisi_uc_pmu_driver);
+	cpuhp_remove_multi_state(hisi_uc_pmu_online);
+}
+module_exit(hisi_uc_pmu_module_exit);
+
+MODULE_DESCRIPTION("HiSilicon SoC UC uncore PMU driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Junhao He <hejunhao3@huawei.com>");
diff --git a/drivers/perf/hisilicon/hns3_pmu.c b/drivers/perf/hisilicon/hns3_pmu.c
new file mode 100644
index 0000000000..16869bf5bf
--- /dev/null
+++ b/drivers/perf/hisilicon/hns3_pmu.c
@@ -0,0 +1,1671 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This driver adds support for HNS3 PMU iEP device. Related perf events are
+ * bandwidth, latency, packet rate, interrupt rate etc.
+ *
+ * Copyright (C) 2022 HiSilicon Limited
+ */
+#include <linux/bitfield.h>
+#include <linux/bitmap.h>
+#include <linux/bug.h>
+#include <linux/cpuhotplug.h>
+#include <linux/cpumask.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/io-64-nonatomic-hi-lo.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pci-epf.h>
+#include <linux/perf_event.h>
+#include <linux/smp.h>
+
+/* registers offset address */
+#define HNS3_PMU_REG_GLOBAL_CTRL		0x0000
+#define HNS3_PMU_REG_CLOCK_FREQ			0x0020
+#define HNS3_PMU_REG_BDF			0x0fe0
+#define HNS3_PMU_REG_VERSION			0x0fe4
+#define HNS3_PMU_REG_DEVICE_ID			0x0fe8
+
+#define HNS3_PMU_REG_EVENT_OFFSET		0x1000
+#define HNS3_PMU_REG_EVENT_SIZE			0x1000
+#define HNS3_PMU_REG_EVENT_CTRL_LOW		0x00
+#define HNS3_PMU_REG_EVENT_CTRL_HIGH		0x04
+#define HNS3_PMU_REG_EVENT_INTR_STATUS		0x08
+#define HNS3_PMU_REG_EVENT_INTR_MASK		0x0c
+#define HNS3_PMU_REG_EVENT_COUNTER		0x10
+#define HNS3_PMU_REG_EVENT_EXT_COUNTER		0x18
+#define HNS3_PMU_REG_EVENT_QID_CTRL		0x28
+#define HNS3_PMU_REG_EVENT_QID_PARA		0x2c
+
+#define HNS3_PMU_FILTER_SUPPORT_GLOBAL		BIT(0)
+#define HNS3_PMU_FILTER_SUPPORT_PORT		BIT(1)
+#define HNS3_PMU_FILTER_SUPPORT_PORT_TC		BIT(2)
+#define HNS3_PMU_FILTER_SUPPORT_FUNC		BIT(3)
+#define HNS3_PMU_FILTER_SUPPORT_FUNC_QUEUE	BIT(4)
+#define HNS3_PMU_FILTER_SUPPORT_FUNC_INTR	BIT(5)
+
+#define HNS3_PMU_FILTER_ALL_TC			0xf
+#define HNS3_PMU_FILTER_ALL_QUEUE		0xffff
+
+#define HNS3_PMU_CTRL_SUBEVENT_S		4
+#define HNS3_PMU_CTRL_FILTER_MODE_S		24
+
+#define HNS3_PMU_GLOBAL_START			BIT(0)
+
+#define HNS3_PMU_EVENT_STATUS_RESET		BIT(11)
+#define HNS3_PMU_EVENT_EN			BIT(12)
+#define HNS3_PMU_EVENT_OVERFLOW_RESTART		BIT(15)
+
+#define HNS3_PMU_QID_PARA_FUNC_S		0
+#define HNS3_PMU_QID_PARA_QUEUE_S		16
+
+#define HNS3_PMU_QID_CTRL_REQ_ENABLE		BIT(0)
+#define HNS3_PMU_QID_CTRL_DONE			BIT(1)
+#define HNS3_PMU_QID_CTRL_MISS			BIT(2)
+
+#define HNS3_PMU_INTR_MASK_OVERFLOW		BIT(1)
+
+#define HNS3_PMU_MAX_HW_EVENTS			8
+
+/*
+ * Each hardware event contains two registers (counter and ext_counter) for
+ * bandwidth, packet rate, latency and interrupt rate. These two registers will
+ * be triggered to run at the same when a hardware event is enabled. The meaning
+ * of counter and ext_counter of different event type are different, their
+ * meaning show as follow:
+ *
+ * +----------------+------------------+---------------+
+ * |   event type   |     counter      |  ext_counter  |
+ * +----------------+------------------+---------------+
+ * | bandwidth      | byte number      | cycle number  |
+ * +----------------+------------------+---------------+
+ * | packet rate    | packet number    | cycle number  |
+ * +----------------+------------------+---------------+
+ * | latency        | cycle number     | packet number |
+ * +----------------+------------------+---------------+
+ * | interrupt rate | interrupt number | cycle number  |
+ * +----------------+------------------+---------------+
+ *
+ * The cycle number indicates increment of counter of hardware timer, the
+ * frequency of hardware timer can be read from hw_clk_freq file.
+ *
+ * Performance of each hardware event is calculated by: counter / ext_counter.
+ *
+ * Since processing of data is preferred to be done in userspace, we expose
+ * ext_counter as a separate event for userspace and use bit 16 to indicate it.
+ * For example, event 0x00001 and 0x10001 are actually one event for hardware
+ * because bit 0-15 are same. If the bit 16 of one event is 0 means to read
+ * counter register, otherwise means to read ext_counter register.
+ */
+/* bandwidth events */
+#define HNS3_PMU_EVT_BW_SSU_EGU_BYTE_NUM		0x00001
+#define HNS3_PMU_EVT_BW_SSU_EGU_TIME			0x10001
+#define HNS3_PMU_EVT_BW_SSU_RPU_BYTE_NUM		0x00002
+#define HNS3_PMU_EVT_BW_SSU_RPU_TIME			0x10002
+#define HNS3_PMU_EVT_BW_SSU_ROCE_BYTE_NUM		0x00003
+#define HNS3_PMU_EVT_BW_SSU_ROCE_TIME			0x10003
+#define HNS3_PMU_EVT_BW_ROCE_SSU_BYTE_NUM		0x00004
+#define HNS3_PMU_EVT_BW_ROCE_SSU_TIME			0x10004
+#define HNS3_PMU_EVT_BW_TPU_SSU_BYTE_NUM		0x00005
+#define HNS3_PMU_EVT_BW_TPU_SSU_TIME			0x10005
+#define HNS3_PMU_EVT_BW_RPU_RCBRX_BYTE_NUM		0x00006
+#define HNS3_PMU_EVT_BW_RPU_RCBRX_TIME			0x10006
+#define HNS3_PMU_EVT_BW_RCBTX_TXSCH_BYTE_NUM		0x00008
+#define HNS3_PMU_EVT_BW_RCBTX_TXSCH_TIME		0x10008
+#define HNS3_PMU_EVT_BW_WR_FBD_BYTE_NUM			0x00009
+#define HNS3_PMU_EVT_BW_WR_FBD_TIME			0x10009
+#define HNS3_PMU_EVT_BW_WR_EBD_BYTE_NUM			0x0000a
+#define HNS3_PMU_EVT_BW_WR_EBD_TIME			0x1000a
+#define HNS3_PMU_EVT_BW_RD_FBD_BYTE_NUM			0x0000b
+#define HNS3_PMU_EVT_BW_RD_FBD_TIME			0x1000b
+#define HNS3_PMU_EVT_BW_RD_EBD_BYTE_NUM			0x0000c
+#define HNS3_PMU_EVT_BW_RD_EBD_TIME			0x1000c
+#define HNS3_PMU_EVT_BW_RD_PAY_M0_BYTE_NUM		0x0000d
+#define HNS3_PMU_EVT_BW_RD_PAY_M0_TIME			0x1000d
+#define HNS3_PMU_EVT_BW_RD_PAY_M1_BYTE_NUM		0x0000e
+#define HNS3_PMU_EVT_BW_RD_PAY_M1_TIME			0x1000e
+#define HNS3_PMU_EVT_BW_WR_PAY_M0_BYTE_NUM		0x0000f
+#define HNS3_PMU_EVT_BW_WR_PAY_M0_TIME			0x1000f
+#define HNS3_PMU_EVT_BW_WR_PAY_M1_BYTE_NUM		0x00010
+#define HNS3_PMU_EVT_BW_WR_PAY_M1_TIME			0x10010
+
+/* packet rate events */
+#define HNS3_PMU_EVT_PPS_IGU_SSU_PACKET_NUM		0x00100
+#define HNS3_PMU_EVT_PPS_IGU_SSU_TIME			0x10100
+#define HNS3_PMU_EVT_PPS_SSU_EGU_PACKET_NUM		0x00101
+#define HNS3_PMU_EVT_PPS_SSU_EGU_TIME			0x10101
+#define HNS3_PMU_EVT_PPS_SSU_RPU_PACKET_NUM		0x00102
+#define HNS3_PMU_EVT_PPS_SSU_RPU_TIME			0x10102
+#define HNS3_PMU_EVT_PPS_SSU_ROCE_PACKET_NUM		0x00103
+#define HNS3_PMU_EVT_PPS_SSU_ROCE_TIME			0x10103
+#define HNS3_PMU_EVT_PPS_ROCE_SSU_PACKET_NUM		0x00104
+#define HNS3_PMU_EVT_PPS_ROCE_SSU_TIME			0x10104
+#define HNS3_PMU_EVT_PPS_TPU_SSU_PACKET_NUM		0x00105
+#define HNS3_PMU_EVT_PPS_TPU_SSU_TIME			0x10105
+#define HNS3_PMU_EVT_PPS_RPU_RCBRX_PACKET_NUM		0x00106
+#define HNS3_PMU_EVT_PPS_RPU_RCBRX_TIME			0x10106
+#define HNS3_PMU_EVT_PPS_RCBTX_TPU_PACKET_NUM		0x00107
+#define HNS3_PMU_EVT_PPS_RCBTX_TPU_TIME			0x10107
+#define HNS3_PMU_EVT_PPS_RCBTX_TXSCH_PACKET_NUM		0x00108
+#define HNS3_PMU_EVT_PPS_RCBTX_TXSCH_TIME		0x10108
+#define HNS3_PMU_EVT_PPS_WR_FBD_PACKET_NUM		0x00109
+#define HNS3_PMU_EVT_PPS_WR_FBD_TIME			0x10109
+#define HNS3_PMU_EVT_PPS_WR_EBD_PACKET_NUM		0x0010a
+#define HNS3_PMU_EVT_PPS_WR_EBD_TIME			0x1010a
+#define HNS3_PMU_EVT_PPS_RD_FBD_PACKET_NUM		0x0010b
+#define HNS3_PMU_EVT_PPS_RD_FBD_TIME			0x1010b
+#define HNS3_PMU_EVT_PPS_RD_EBD_PACKET_NUM		0x0010c
+#define HNS3_PMU_EVT_PPS_RD_EBD_TIME			0x1010c
+#define HNS3_PMU_EVT_PPS_RD_PAY_M0_PACKET_NUM		0x0010d
+#define HNS3_PMU_EVT_PPS_RD_PAY_M0_TIME			0x1010d
+#define HNS3_PMU_EVT_PPS_RD_PAY_M1_PACKET_NUM		0x0010e
+#define HNS3_PMU_EVT_PPS_RD_PAY_M1_TIME			0x1010e
+#define HNS3_PMU_EVT_PPS_WR_PAY_M0_PACKET_NUM		0x0010f
+#define HNS3_PMU_EVT_PPS_WR_PAY_M0_TIME			0x1010f
+#define HNS3_PMU_EVT_PPS_WR_PAY_M1_PACKET_NUM		0x00110
+#define HNS3_PMU_EVT_PPS_WR_PAY_M1_TIME			0x10110
+#define HNS3_PMU_EVT_PPS_NICROH_TX_PRE_PACKET_NUM	0x00111
+#define HNS3_PMU_EVT_PPS_NICROH_TX_PRE_TIME		0x10111
+#define HNS3_PMU_EVT_PPS_NICROH_RX_PRE_PACKET_NUM	0x00112
+#define HNS3_PMU_EVT_PPS_NICROH_RX_PRE_TIME		0x10112
+
+/* latency events */
+#define HNS3_PMU_EVT_DLY_TX_PUSH_TIME			0x00202
+#define HNS3_PMU_EVT_DLY_TX_PUSH_PACKET_NUM		0x10202
+#define HNS3_PMU_EVT_DLY_TX_TIME			0x00204
+#define HNS3_PMU_EVT_DLY_TX_PACKET_NUM			0x10204
+#define HNS3_PMU_EVT_DLY_SSU_TX_NIC_TIME		0x00206
+#define HNS3_PMU_EVT_DLY_SSU_TX_NIC_PACKET_NUM		0x10206
+#define HNS3_PMU_EVT_DLY_SSU_TX_ROCE_TIME		0x00207
+#define HNS3_PMU_EVT_DLY_SSU_TX_ROCE_PACKET_NUM		0x10207
+#define HNS3_PMU_EVT_DLY_SSU_RX_NIC_TIME		0x00208
+#define HNS3_PMU_EVT_DLY_SSU_RX_NIC_PACKET_NUM		0x10208
+#define HNS3_PMU_EVT_DLY_SSU_RX_ROCE_TIME		0x00209
+#define HNS3_PMU_EVT_DLY_SSU_RX_ROCE_PACKET_NUM		0x10209
+#define HNS3_PMU_EVT_DLY_RPU_TIME			0x0020e
+#define HNS3_PMU_EVT_DLY_RPU_PACKET_NUM			0x1020e
+#define HNS3_PMU_EVT_DLY_TPU_TIME			0x0020f
+#define HNS3_PMU_EVT_DLY_TPU_PACKET_NUM			0x1020f
+#define HNS3_PMU_EVT_DLY_RPE_TIME			0x00210
+#define HNS3_PMU_EVT_DLY_RPE_PACKET_NUM			0x10210
+#define HNS3_PMU_EVT_DLY_TPE_TIME			0x00211
+#define HNS3_PMU_EVT_DLY_TPE_PACKET_NUM			0x10211
+#define HNS3_PMU_EVT_DLY_TPE_PUSH_TIME			0x00212
+#define HNS3_PMU_EVT_DLY_TPE_PUSH_PACKET_NUM		0x10212
+#define HNS3_PMU_EVT_DLY_WR_FBD_TIME			0x00213
+#define HNS3_PMU_EVT_DLY_WR_FBD_PACKET_NUM		0x10213
+#define HNS3_PMU_EVT_DLY_WR_EBD_TIME			0x00214
+#define HNS3_PMU_EVT_DLY_WR_EBD_PACKET_NUM		0x10214
+#define HNS3_PMU_EVT_DLY_RD_FBD_TIME			0x00215
+#define HNS3_PMU_EVT_DLY_RD_FBD_PACKET_NUM		0x10215
+#define HNS3_PMU_EVT_DLY_RD_EBD_TIME			0x00216
+#define HNS3_PMU_EVT_DLY_RD_EBD_PACKET_NUM		0x10216
+#define HNS3_PMU_EVT_DLY_RD_PAY_M0_TIME			0x00217
+#define HNS3_PMU_EVT_DLY_RD_PAY_M0_PACKET_NUM		0x10217
+#define HNS3_PMU_EVT_DLY_RD_PAY_M1_TIME			0x00218
+#define HNS3_PMU_EVT_DLY_RD_PAY_M1_PACKET_NUM		0x10218
+#define HNS3_PMU_EVT_DLY_WR_PAY_M0_TIME			0x00219
+#define HNS3_PMU_EVT_DLY_WR_PAY_M0_PACKET_NUM		0x10219
+#define HNS3_PMU_EVT_DLY_WR_PAY_M1_TIME			0x0021a
+#define HNS3_PMU_EVT_DLY_WR_PAY_M1_PACKET_NUM		0x1021a
+#define HNS3_PMU_EVT_DLY_MSIX_WRITE_TIME		0x0021c
+#define HNS3_PMU_EVT_DLY_MSIX_WRITE_PACKET_NUM		0x1021c
+
+/* interrupt rate events */
+#define HNS3_PMU_EVT_PPS_MSIX_NIC_INTR_NUM		0x00300
+#define HNS3_PMU_EVT_PPS_MSIX_NIC_TIME			0x10300
+
+/* filter mode supported by each bandwidth event */
+#define HNS3_PMU_FILTER_BW_SSU_EGU		0x07
+#define HNS3_PMU_FILTER_BW_SSU_RPU		0x1f
+#define HNS3_PMU_FILTER_BW_SSU_ROCE		0x0f
+#define HNS3_PMU_FILTER_BW_ROCE_SSU		0x0f
+#define HNS3_PMU_FILTER_BW_TPU_SSU		0x1f
+#define HNS3_PMU_FILTER_BW_RPU_RCBRX		0x11
+#define HNS3_PMU_FILTER_BW_RCBTX_TXSCH		0x11
+#define HNS3_PMU_FILTER_BW_WR_FBD		0x1b
+#define HNS3_PMU_FILTER_BW_WR_EBD		0x11
+#define HNS3_PMU_FILTER_BW_RD_FBD		0x01
+#define HNS3_PMU_FILTER_BW_RD_EBD		0x1b
+#define HNS3_PMU_FILTER_BW_RD_PAY_M0		0x01
+#define HNS3_PMU_FILTER_BW_RD_PAY_M1		0x01
+#define HNS3_PMU_FILTER_BW_WR_PAY_M0		0x01
+#define HNS3_PMU_FILTER_BW_WR_PAY_M1		0x01
+
+/* filter mode supported by each packet rate event */
+#define HNS3_PMU_FILTER_PPS_IGU_SSU		0x07
+#define HNS3_PMU_FILTER_PPS_SSU_EGU		0x07
+#define HNS3_PMU_FILTER_PPS_SSU_RPU		0x1f
+#define HNS3_PMU_FILTER_PPS_SSU_ROCE		0x0f
+#define HNS3_PMU_FILTER_PPS_ROCE_SSU		0x0f
+#define HNS3_PMU_FILTER_PPS_TPU_SSU		0x1f
+#define HNS3_PMU_FILTER_PPS_RPU_RCBRX		0x11
+#define HNS3_PMU_FILTER_PPS_RCBTX_TPU		0x1f
+#define HNS3_PMU_FILTER_PPS_RCBTX_TXSCH		0x11
+#define HNS3_PMU_FILTER_PPS_WR_FBD		0x1b
+#define HNS3_PMU_FILTER_PPS_WR_EBD		0x11
+#define HNS3_PMU_FILTER_PPS_RD_FBD		0x01
+#define HNS3_PMU_FILTER_PPS_RD_EBD		0x1b
+#define HNS3_PMU_FILTER_PPS_RD_PAY_M0		0x01
+#define HNS3_PMU_FILTER_PPS_RD_PAY_M1		0x01
+#define HNS3_PMU_FILTER_PPS_WR_PAY_M0		0x01
+#define HNS3_PMU_FILTER_PPS_WR_PAY_M1		0x01
+#define HNS3_PMU_FILTER_PPS_NICROH_TX_PRE	0x01
+#define HNS3_PMU_FILTER_PPS_NICROH_RX_PRE	0x01
+
+/* filter mode supported by each latency event */
+#define HNS3_PMU_FILTER_DLY_TX_PUSH		0x01
+#define HNS3_PMU_FILTER_DLY_TX			0x01
+#define HNS3_PMU_FILTER_DLY_SSU_TX_NIC		0x07
+#define HNS3_PMU_FILTER_DLY_SSU_TX_ROCE		0x07
+#define HNS3_PMU_FILTER_DLY_SSU_RX_NIC		0x07
+#define HNS3_PMU_FILTER_DLY_SSU_RX_ROCE		0x07
+#define HNS3_PMU_FILTER_DLY_RPU			0x11
+#define HNS3_PMU_FILTER_DLY_TPU			0x1f
+#define HNS3_PMU_FILTER_DLY_RPE			0x01
+#define HNS3_PMU_FILTER_DLY_TPE			0x0b
+#define HNS3_PMU_FILTER_DLY_TPE_PUSH		0x1b
+#define HNS3_PMU_FILTER_DLY_WR_FBD		0x1b
+#define HNS3_PMU_FILTER_DLY_WR_EBD		0x11
+#define HNS3_PMU_FILTER_DLY_RD_FBD		0x01
+#define HNS3_PMU_FILTER_DLY_RD_EBD		0x1b
+#define HNS3_PMU_FILTER_DLY_RD_PAY_M0		0x01
+#define HNS3_PMU_FILTER_DLY_RD_PAY_M1		0x01
+#define HNS3_PMU_FILTER_DLY_WR_PAY_M0		0x01
+#define HNS3_PMU_FILTER_DLY_WR_PAY_M1		0x01
+#define HNS3_PMU_FILTER_DLY_MSIX_WRITE		0x01
+
+/* filter mode supported by each interrupt rate event */
+#define HNS3_PMU_FILTER_INTR_MSIX_NIC		0x01
+
+enum hns3_pmu_hw_filter_mode {
+	HNS3_PMU_HW_FILTER_GLOBAL,
+	HNS3_PMU_HW_FILTER_PORT,
+	HNS3_PMU_HW_FILTER_PORT_TC,
+	HNS3_PMU_HW_FILTER_FUNC,
+	HNS3_PMU_HW_FILTER_FUNC_QUEUE,
+	HNS3_PMU_HW_FILTER_FUNC_INTR,
+};
+
+struct hns3_pmu_event_attr {
+	u32 event;
+	u16 filter_support;
+};
+
+struct hns3_pmu {
+	struct perf_event *hw_events[HNS3_PMU_MAX_HW_EVENTS];
+	struct hlist_node node;
+	struct pci_dev *pdev;
+	struct pmu pmu;
+	void __iomem *base;
+	int irq;
+	int on_cpu;
+	u32 identifier;
+	u32 hw_clk_freq; /* hardware clock frequency of PMU */
+	/* maximum and minimum bdf allowed by PMU */
+	u16 bdf_min;
+	u16 bdf_max;
+};
+
+#define to_hns3_pmu(p)  (container_of((p), struct hns3_pmu, pmu))
+
+#define GET_PCI_DEVFN(bdf)  ((bdf) & 0xff)
+
+#define FILTER_CONDITION_PORT(port) ((1 << (port)) & 0xff)
+#define FILTER_CONDITION_PORT_TC(port, tc) (((port) << 3) | ((tc) & 0x07))
+#define FILTER_CONDITION_FUNC_INTR(func, intr) (((intr) << 8) | (func))
+
+#define HNS3_PMU_FILTER_ATTR(_name, _config, _start, _end)               \
+	static inline u64 hns3_pmu_get_##_name(struct perf_event *event) \
+	{                                                                \
+		return FIELD_GET(GENMASK_ULL(_end, _start),              \
+				 event->attr._config);                   \
+	}
+
+HNS3_PMU_FILTER_ATTR(subevent, config, 0, 7);
+HNS3_PMU_FILTER_ATTR(event_type, config, 8, 15);
+HNS3_PMU_FILTER_ATTR(ext_counter_used, config, 16, 16);
+HNS3_PMU_FILTER_ATTR(port, config1, 0, 3);
+HNS3_PMU_FILTER_ATTR(tc, config1, 4, 7);
+HNS3_PMU_FILTER_ATTR(bdf, config1, 8, 23);
+HNS3_PMU_FILTER_ATTR(queue, config1, 24, 39);
+HNS3_PMU_FILTER_ATTR(intr, config1, 40, 51);
+HNS3_PMU_FILTER_ATTR(global, config1, 52, 52);
+
+#define HNS3_BW_EVT_BYTE_NUM(_name)	(&(struct hns3_pmu_event_attr) {\
+	HNS3_PMU_EVT_BW_##_name##_BYTE_NUM,				\
+	HNS3_PMU_FILTER_BW_##_name})
+#define HNS3_BW_EVT_TIME(_name)		(&(struct hns3_pmu_event_attr) {\
+	HNS3_PMU_EVT_BW_##_name##_TIME,					\
+	HNS3_PMU_FILTER_BW_##_name})
+#define HNS3_PPS_EVT_PACKET_NUM(_name)	(&(struct hns3_pmu_event_attr) {\
+	HNS3_PMU_EVT_PPS_##_name##_PACKET_NUM,				\
+	HNS3_PMU_FILTER_PPS_##_name})
+#define HNS3_PPS_EVT_TIME(_name)	(&(struct hns3_pmu_event_attr) {\
+	HNS3_PMU_EVT_PPS_##_name##_TIME,				\
+	HNS3_PMU_FILTER_PPS_##_name})
+#define HNS3_DLY_EVT_TIME(_name)	(&(struct hns3_pmu_event_attr) {\
+	HNS3_PMU_EVT_DLY_##_name##_TIME,				\
+	HNS3_PMU_FILTER_DLY_##_name})
+#define HNS3_DLY_EVT_PACKET_NUM(_name)	(&(struct hns3_pmu_event_attr) {\
+	HNS3_PMU_EVT_DLY_##_name##_PACKET_NUM,				\
+	HNS3_PMU_FILTER_DLY_##_name})
+#define HNS3_INTR_EVT_INTR_NUM(_name)	(&(struct hns3_pmu_event_attr) {\
+	HNS3_PMU_EVT_PPS_##_name##_INTR_NUM,				\
+	HNS3_PMU_FILTER_INTR_##_name})
+#define HNS3_INTR_EVT_TIME(_name)	(&(struct hns3_pmu_event_attr) {\
+	HNS3_PMU_EVT_PPS_##_name##_TIME,				\
+	HNS3_PMU_FILTER_INTR_##_name})
+
+static ssize_t hns3_pmu_format_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *eattr;
+
+	eattr = container_of(attr, struct dev_ext_attribute, attr);
+
+	return sysfs_emit(buf, "%s\n", (char *)eattr->var);
+}
+
+static ssize_t hns3_pmu_event_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct hns3_pmu_event_attr *event;
+	struct dev_ext_attribute *eattr;
+
+	eattr = container_of(attr, struct dev_ext_attribute, attr);
+	event = eattr->var;
+
+	return sysfs_emit(buf, "config=0x%x\n", event->event);
+}
+
+static ssize_t hns3_pmu_filter_mode_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct hns3_pmu_event_attr *event;
+	struct dev_ext_attribute *eattr;
+	int len;
+
+	eattr = container_of(attr, struct dev_ext_attribute, attr);
+	event = eattr->var;
+
+	len = sysfs_emit_at(buf, 0, "filter mode supported: ");
+	if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_GLOBAL)
+		len += sysfs_emit_at(buf, len, "global ");
+	if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT)
+		len += sysfs_emit_at(buf, len, "port ");
+	if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT_TC)
+		len += sysfs_emit_at(buf, len, "port-tc ");
+	if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC)
+		len += sysfs_emit_at(buf, len, "func ");
+	if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_QUEUE)
+		len += sysfs_emit_at(buf, len, "func-queue ");
+	if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_INTR)
+		len += sysfs_emit_at(buf, len, "func-intr ");
+
+	len += sysfs_emit_at(buf, len, "\n");
+
+	return len;
+}
+
+#define HNS3_PMU_ATTR(_name, _func, _config)				\
+	(&((struct dev_ext_attribute[]) {				\
+		{ __ATTR(_name, 0444, _func, NULL), (void *)_config }	\
+	})[0].attr.attr)
+
+#define HNS3_PMU_FORMAT_ATTR(_name, _format) \
+	HNS3_PMU_ATTR(_name, hns3_pmu_format_show, (void *)_format)
+#define HNS3_PMU_EVENT_ATTR(_name, _event) \
+	HNS3_PMU_ATTR(_name, hns3_pmu_event_show, (void *)_event)
+#define HNS3_PMU_FLT_MODE_ATTR(_name, _event) \
+	HNS3_PMU_ATTR(_name, hns3_pmu_filter_mode_show, (void *)_event)
+
+#define HNS3_PMU_BW_EVT_PAIR(_name, _macro) \
+	HNS3_PMU_EVENT_ATTR(_name##_byte_num, HNS3_BW_EVT_BYTE_NUM(_macro)), \
+	HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_BW_EVT_TIME(_macro))
+#define HNS3_PMU_PPS_EVT_PAIR(_name, _macro) \
+	HNS3_PMU_EVENT_ATTR(_name##_packet_num, HNS3_PPS_EVT_PACKET_NUM(_macro)), \
+	HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_PPS_EVT_TIME(_macro))
+#define HNS3_PMU_DLY_EVT_PAIR(_name, _macro) \
+	HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_DLY_EVT_TIME(_macro)), \
+	HNS3_PMU_EVENT_ATTR(_name##_packet_num, HNS3_DLY_EVT_PACKET_NUM(_macro))
+#define HNS3_PMU_INTR_EVT_PAIR(_name, _macro) \
+	HNS3_PMU_EVENT_ATTR(_name##_intr_num, HNS3_INTR_EVT_INTR_NUM(_macro)), \
+	HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_INTR_EVT_TIME(_macro))
+
+#define HNS3_PMU_BW_FLT_MODE_PAIR(_name, _macro) \
+	HNS3_PMU_FLT_MODE_ATTR(_name##_byte_num, HNS3_BW_EVT_BYTE_NUM(_macro)), \
+	HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_BW_EVT_TIME(_macro))
+#define HNS3_PMU_PPS_FLT_MODE_PAIR(_name, _macro) \
+	HNS3_PMU_FLT_MODE_ATTR(_name##_packet_num, HNS3_PPS_EVT_PACKET_NUM(_macro)), \
+	HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_PPS_EVT_TIME(_macro))
+#define HNS3_PMU_DLY_FLT_MODE_PAIR(_name, _macro) \
+	HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_DLY_EVT_TIME(_macro)), \
+	HNS3_PMU_FLT_MODE_ATTR(_name##_packet_num, HNS3_DLY_EVT_PACKET_NUM(_macro))
+#define HNS3_PMU_INTR_FLT_MODE_PAIR(_name, _macro) \
+	HNS3_PMU_FLT_MODE_ATTR(_name##_intr_num, HNS3_INTR_EVT_INTR_NUM(_macro)), \
+	HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_INTR_EVT_TIME(_macro))
+
+static u8 hns3_pmu_hw_filter_modes[] = {
+	HNS3_PMU_HW_FILTER_GLOBAL,
+	HNS3_PMU_HW_FILTER_PORT,
+	HNS3_PMU_HW_FILTER_PORT_TC,
+	HNS3_PMU_HW_FILTER_FUNC,
+	HNS3_PMU_HW_FILTER_FUNC_QUEUE,
+	HNS3_PMU_HW_FILTER_FUNC_INTR,
+};
+
+#define HNS3_PMU_SET_HW_FILTER(_hwc, _mode) \
+	((_hwc)->addr_filters = (void *)&hns3_pmu_hw_filter_modes[(_mode)])
+
+static ssize_t identifier_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev));
+
+	return sysfs_emit(buf, "0x%x\n", hns3_pmu->identifier);
+}
+static DEVICE_ATTR_RO(identifier);
+
+static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev));
+
+	return sysfs_emit(buf, "%d\n", hns3_pmu->on_cpu);
+}
+static DEVICE_ATTR_RO(cpumask);
+
+static ssize_t bdf_min_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev));
+	u16 bdf = hns3_pmu->bdf_min;
+
+	return sysfs_emit(buf, "%02x:%02x.%x\n", PCI_BUS_NUM(bdf),
+			  PCI_SLOT(bdf), PCI_FUNC(bdf));
+}
+static DEVICE_ATTR_RO(bdf_min);
+
+static ssize_t bdf_max_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev));
+	u16 bdf = hns3_pmu->bdf_max;
+
+	return sysfs_emit(buf, "%02x:%02x.%x\n", PCI_BUS_NUM(bdf),
+			  PCI_SLOT(bdf), PCI_FUNC(bdf));
+}
+static DEVICE_ATTR_RO(bdf_max);
+
+static ssize_t hw_clk_freq_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev));
+
+	return sysfs_emit(buf, "%u\n", hns3_pmu->hw_clk_freq);
+}
+static DEVICE_ATTR_RO(hw_clk_freq);
+
+static struct attribute *hns3_pmu_events_attr[] = {
+	/* bandwidth events */
+	HNS3_PMU_BW_EVT_PAIR(bw_ssu_egu, SSU_EGU),
+	HNS3_PMU_BW_EVT_PAIR(bw_ssu_rpu, SSU_RPU),
+	HNS3_PMU_BW_EVT_PAIR(bw_ssu_roce, SSU_ROCE),
+	HNS3_PMU_BW_EVT_PAIR(bw_roce_ssu, ROCE_SSU),
+	HNS3_PMU_BW_EVT_PAIR(bw_tpu_ssu, TPU_SSU),
+	HNS3_PMU_BW_EVT_PAIR(bw_rpu_rcbrx, RPU_RCBRX),
+	HNS3_PMU_BW_EVT_PAIR(bw_rcbtx_txsch, RCBTX_TXSCH),
+	HNS3_PMU_BW_EVT_PAIR(bw_wr_fbd, WR_FBD),
+	HNS3_PMU_BW_EVT_PAIR(bw_wr_ebd, WR_EBD),
+	HNS3_PMU_BW_EVT_PAIR(bw_rd_fbd, RD_FBD),
+	HNS3_PMU_BW_EVT_PAIR(bw_rd_ebd, RD_EBD),
+	HNS3_PMU_BW_EVT_PAIR(bw_rd_pay_m0, RD_PAY_M0),
+	HNS3_PMU_BW_EVT_PAIR(bw_rd_pay_m1, RD_PAY_M1),
+	HNS3_PMU_BW_EVT_PAIR(bw_wr_pay_m0, WR_PAY_M0),
+	HNS3_PMU_BW_EVT_PAIR(bw_wr_pay_m1, WR_PAY_M1),
+
+	/* packet rate events */
+	HNS3_PMU_PPS_EVT_PAIR(pps_igu_ssu, IGU_SSU),
+	HNS3_PMU_PPS_EVT_PAIR(pps_ssu_egu, SSU_EGU),
+	HNS3_PMU_PPS_EVT_PAIR(pps_ssu_rpu, SSU_RPU),
+	HNS3_PMU_PPS_EVT_PAIR(pps_ssu_roce, SSU_ROCE),
+	HNS3_PMU_PPS_EVT_PAIR(pps_roce_ssu, ROCE_SSU),
+	HNS3_PMU_PPS_EVT_PAIR(pps_tpu_ssu, TPU_SSU),
+	HNS3_PMU_PPS_EVT_PAIR(pps_rpu_rcbrx, RPU_RCBRX),
+	HNS3_PMU_PPS_EVT_PAIR(pps_rcbtx_tpu, RCBTX_TPU),
+	HNS3_PMU_PPS_EVT_PAIR(pps_rcbtx_txsch, RCBTX_TXSCH),
+	HNS3_PMU_PPS_EVT_PAIR(pps_wr_fbd, WR_FBD),
+	HNS3_PMU_PPS_EVT_PAIR(pps_wr_ebd, WR_EBD),
+	HNS3_PMU_PPS_EVT_PAIR(pps_rd_fbd, RD_FBD),
+	HNS3_PMU_PPS_EVT_PAIR(pps_rd_ebd, RD_EBD),
+	HNS3_PMU_PPS_EVT_PAIR(pps_rd_pay_m0, RD_PAY_M0),
+	HNS3_PMU_PPS_EVT_PAIR(pps_rd_pay_m1, RD_PAY_M1),
+	HNS3_PMU_PPS_EVT_PAIR(pps_wr_pay_m0, WR_PAY_M0),
+	HNS3_PMU_PPS_EVT_PAIR(pps_wr_pay_m1, WR_PAY_M1),
+	HNS3_PMU_PPS_EVT_PAIR(pps_intr_nicroh_tx_pre, NICROH_TX_PRE),
+	HNS3_PMU_PPS_EVT_PAIR(pps_intr_nicroh_rx_pre, NICROH_RX_PRE),
+
+	/* latency events */
+	HNS3_PMU_DLY_EVT_PAIR(dly_tx_push_to_mac, TX_PUSH),
+	HNS3_PMU_DLY_EVT_PAIR(dly_tx_normal_to_mac, TX),
+	HNS3_PMU_DLY_EVT_PAIR(dly_ssu_tx_th_nic, SSU_TX_NIC),
+	HNS3_PMU_DLY_EVT_PAIR(dly_ssu_tx_th_roce, SSU_TX_ROCE),
+	HNS3_PMU_DLY_EVT_PAIR(dly_ssu_rx_th_nic, SSU_RX_NIC),
+	HNS3_PMU_DLY_EVT_PAIR(dly_ssu_rx_th_roce, SSU_RX_ROCE),
+	HNS3_PMU_DLY_EVT_PAIR(dly_rpu, RPU),
+	HNS3_PMU_DLY_EVT_PAIR(dly_tpu, TPU),
+	HNS3_PMU_DLY_EVT_PAIR(dly_rpe, RPE),
+	HNS3_PMU_DLY_EVT_PAIR(dly_tpe_normal, TPE),
+	HNS3_PMU_DLY_EVT_PAIR(dly_tpe_push, TPE_PUSH),
+	HNS3_PMU_DLY_EVT_PAIR(dly_wr_fbd, WR_FBD),
+	HNS3_PMU_DLY_EVT_PAIR(dly_wr_ebd, WR_EBD),
+	HNS3_PMU_DLY_EVT_PAIR(dly_rd_fbd, RD_FBD),
+	HNS3_PMU_DLY_EVT_PAIR(dly_rd_ebd, RD_EBD),
+	HNS3_PMU_DLY_EVT_PAIR(dly_rd_pay_m0, RD_PAY_M0),
+	HNS3_PMU_DLY_EVT_PAIR(dly_rd_pay_m1, RD_PAY_M1),
+	HNS3_PMU_DLY_EVT_PAIR(dly_wr_pay_m0, WR_PAY_M0),
+	HNS3_PMU_DLY_EVT_PAIR(dly_wr_pay_m1, WR_PAY_M1),
+	HNS3_PMU_DLY_EVT_PAIR(dly_msix_write, MSIX_WRITE),
+
+	/* interrupt rate events */
+	HNS3_PMU_INTR_EVT_PAIR(pps_intr_msix_nic, MSIX_NIC),
+
+	NULL
+};
+
+static struct attribute *hns3_pmu_filter_mode_attr[] = {
+	/* bandwidth events */
+	HNS3_PMU_BW_FLT_MODE_PAIR(bw_ssu_egu, SSU_EGU),
+	HNS3_PMU_BW_FLT_MODE_PAIR(bw_ssu_rpu, SSU_RPU),
+	HNS3_PMU_BW_FLT_MODE_PAIR(bw_ssu_roce, SSU_ROCE),
+	HNS3_PMU_BW_FLT_MODE_PAIR(bw_roce_ssu, ROCE_SSU),
+	HNS3_PMU_BW_FLT_MODE_PAIR(bw_tpu_ssu, TPU_SSU),
+	HNS3_PMU_BW_FLT_MODE_PAIR(bw_rpu_rcbrx, RPU_RCBRX),
+	HNS3_PMU_BW_FLT_MODE_PAIR(bw_rcbtx_txsch, RCBTX_TXSCH),
+	HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_fbd, WR_FBD),
+	HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_ebd, WR_EBD),
+	HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_fbd, RD_FBD),
+	HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_ebd, RD_EBD),
+	HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_pay_m0, RD_PAY_M0),
+	HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_pay_m1, RD_PAY_M1),
+	HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_pay_m0, WR_PAY_M0),
+	HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_pay_m1, WR_PAY_M1),
+
+	/* packet rate events */
+	HNS3_PMU_PPS_FLT_MODE_PAIR(pps_igu_ssu, IGU_SSU),
+	HNS3_PMU_PPS_FLT_MODE_PAIR(pps_ssu_egu, SSU_EGU),
+	HNS3_PMU_PPS_FLT_MODE_PAIR(pps_ssu_rpu, SSU_RPU),
+	HNS3_PMU_PPS_FLT_MODE_PAIR(pps_ssu_roce, SSU_ROCE),
+	HNS3_PMU_PPS_FLT_MODE_PAIR(pps_roce_ssu, ROCE_SSU),
+	HNS3_PMU_PPS_FLT_MODE_PAIR(pps_tpu_ssu, TPU_SSU),
+	HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rpu_rcbrx, RPU_RCBRX),
+	HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rcbtx_tpu, RCBTX_TPU),
+	HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rcbtx_txsch, RCBTX_TXSCH),
+	HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_fbd, WR_FBD),
+	HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_ebd, WR_EBD),
+	HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_fbd, RD_FBD),
+	HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_ebd, RD_EBD),
+	HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_pay_m0, RD_PAY_M0),
+	HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_pay_m1, RD_PAY_M1),
+	HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_pay_m0, WR_PAY_M0),
+	HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_pay_m1, WR_PAY_M1),
+	HNS3_PMU_PPS_FLT_MODE_PAIR(pps_intr_nicroh_tx_pre, NICROH_TX_PRE),
+	HNS3_PMU_PPS_FLT_MODE_PAIR(pps_intr_nicroh_rx_pre, NICROH_RX_PRE),
+
+	/* latency events */
+	HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tx_push_to_mac, TX_PUSH),
+	HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tx_normal_to_mac, TX),
+	HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_tx_th_nic, SSU_TX_NIC),
+	HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_tx_th_roce, SSU_TX_ROCE),
+	HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_rx_th_nic, SSU_RX_NIC),
+	HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_rx_th_roce, SSU_RX_ROCE),
+	HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rpu, RPU),
+	HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tpu, TPU),
+	HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rpe, RPE),
+	HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tpe_normal, TPE),
+	HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tpe_push, TPE_PUSH),
+	HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_fbd, WR_FBD),
+	HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_ebd, WR_EBD),
+	HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_fbd, RD_FBD),
+	HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_ebd, RD_EBD),
+	HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_pay_m0, RD_PAY_M0),
+	HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_pay_m1, RD_PAY_M1),
+	HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_pay_m0, WR_PAY_M0),
+	HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_pay_m1, WR_PAY_M1),
+	HNS3_PMU_DLY_FLT_MODE_PAIR(dly_msix_write, MSIX_WRITE),
+
+	/* interrupt rate events */
+	HNS3_PMU_INTR_FLT_MODE_PAIR(pps_intr_msix_nic, MSIX_NIC),
+
+	NULL
+};
+
+static struct attribute_group hns3_pmu_events_group = {
+	.name = "events",
+	.attrs = hns3_pmu_events_attr,
+};
+
+static struct attribute_group hns3_pmu_filter_mode_group = {
+	.name = "filtermode",
+	.attrs = hns3_pmu_filter_mode_attr,
+};
+
+static struct attribute *hns3_pmu_format_attr[] = {
+	HNS3_PMU_FORMAT_ATTR(subevent, "config:0-7"),
+	HNS3_PMU_FORMAT_ATTR(event_type, "config:8-15"),
+	HNS3_PMU_FORMAT_ATTR(ext_counter_used, "config:16"),
+	HNS3_PMU_FORMAT_ATTR(port, "config1:0-3"),
+	HNS3_PMU_FORMAT_ATTR(tc, "config1:4-7"),
+	HNS3_PMU_FORMAT_ATTR(bdf, "config1:8-23"),
+	HNS3_PMU_FORMAT_ATTR(queue, "config1:24-39"),
+	HNS3_PMU_FORMAT_ATTR(intr, "config1:40-51"),
+	HNS3_PMU_FORMAT_ATTR(global, "config1:52"),
+	NULL
+};
+
+static struct attribute_group hns3_pmu_format_group = {
+	.name = "format",
+	.attrs = hns3_pmu_format_attr,
+};
+
+static struct attribute *hns3_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL
+};
+
+static struct attribute_group hns3_pmu_cpumask_attr_group = {
+	.attrs = hns3_pmu_cpumask_attrs,
+};
+
+static struct attribute *hns3_pmu_identifier_attrs[] = {
+	&dev_attr_identifier.attr,
+	NULL
+};
+
+static struct attribute_group hns3_pmu_identifier_attr_group = {
+	.attrs = hns3_pmu_identifier_attrs,
+};
+
+static struct attribute *hns3_pmu_bdf_range_attrs[] = {
+	&dev_attr_bdf_min.attr,
+	&dev_attr_bdf_max.attr,
+	NULL
+};
+
+static struct attribute_group hns3_pmu_bdf_range_attr_group = {
+	.attrs = hns3_pmu_bdf_range_attrs,
+};
+
+static struct attribute *hns3_pmu_hw_clk_freq_attrs[] = {
+	&dev_attr_hw_clk_freq.attr,
+	NULL
+};
+
+static struct attribute_group hns3_pmu_hw_clk_freq_attr_group = {
+	.attrs = hns3_pmu_hw_clk_freq_attrs,
+};
+
+static const struct attribute_group *hns3_pmu_attr_groups[] = {
+	&hns3_pmu_events_group,
+	&hns3_pmu_filter_mode_group,
+	&hns3_pmu_format_group,
+	&hns3_pmu_cpumask_attr_group,
+	&hns3_pmu_identifier_attr_group,
+	&hns3_pmu_bdf_range_attr_group,
+	&hns3_pmu_hw_clk_freq_attr_group,
+	NULL
+};
+
+static u32 hns3_pmu_get_event(struct perf_event *event)
+{
+	return hns3_pmu_get_ext_counter_used(event) << 16 |
+	       hns3_pmu_get_event_type(event) << 8 |
+	       hns3_pmu_get_subevent(event);
+}
+
+static u32 hns3_pmu_get_real_event(struct perf_event *event)
+{
+	return hns3_pmu_get_event_type(event) << 8 |
+	       hns3_pmu_get_subevent(event);
+}
+
+static u32 hns3_pmu_get_offset(u32 offset, u32 idx)
+{
+	return offset + HNS3_PMU_REG_EVENT_OFFSET +
+	       HNS3_PMU_REG_EVENT_SIZE * idx;
+}
+
+static u32 hns3_pmu_readl(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx)
+{
+	u32 offset = hns3_pmu_get_offset(reg_offset, idx);
+
+	return readl(hns3_pmu->base + offset);
+}
+
+static void hns3_pmu_writel(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx,
+			    u32 val)
+{
+	u32 offset = hns3_pmu_get_offset(reg_offset, idx);
+
+	writel(val, hns3_pmu->base + offset);
+}
+
+static u64 hns3_pmu_readq(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx)
+{
+	u32 offset = hns3_pmu_get_offset(reg_offset, idx);
+
+	return readq(hns3_pmu->base + offset);
+}
+
+static void hns3_pmu_writeq(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx,
+			    u64 val)
+{
+	u32 offset = hns3_pmu_get_offset(reg_offset, idx);
+
+	writeq(val, hns3_pmu->base + offset);
+}
+
+static bool hns3_pmu_cmp_event(struct perf_event *target,
+			       struct perf_event *event)
+{
+	return hns3_pmu_get_real_event(target) == hns3_pmu_get_real_event(event);
+}
+
+static int hns3_pmu_find_related_event_idx(struct hns3_pmu *hns3_pmu,
+					   struct perf_event *event)
+{
+	struct perf_event *sibling;
+	int hw_event_used = 0;
+	int idx;
+
+	for (idx = 0; idx < HNS3_PMU_MAX_HW_EVENTS; idx++) {
+		sibling = hns3_pmu->hw_events[idx];
+		if (!sibling)
+			continue;
+
+		hw_event_used++;
+
+		if (!hns3_pmu_cmp_event(sibling, event))
+			continue;
+
+		/* Related events is used in group */
+		if (sibling->group_leader == event->group_leader)
+			return idx;
+	}
+
+	/* No related event and all hardware events are used up */
+	if (hw_event_used >= HNS3_PMU_MAX_HW_EVENTS)
+		return -EBUSY;
+
+	/* No related event and there is extra hardware events can be use */
+	return -ENOENT;
+}
+
+static int hns3_pmu_get_event_idx(struct hns3_pmu *hns3_pmu)
+{
+	int idx;
+
+	for (idx = 0; idx < HNS3_PMU_MAX_HW_EVENTS; idx++) {
+		if (!hns3_pmu->hw_events[idx])
+			return idx;
+	}
+
+	return -EBUSY;
+}
+
+static bool hns3_pmu_valid_bdf(struct hns3_pmu *hns3_pmu, u16 bdf)
+{
+	struct pci_dev *pdev;
+
+	if (bdf < hns3_pmu->bdf_min || bdf > hns3_pmu->bdf_max) {
+		pci_err(hns3_pmu->pdev, "Invalid EP device: %#x!\n", bdf);
+		return false;
+	}
+
+	pdev = pci_get_domain_bus_and_slot(pci_domain_nr(hns3_pmu->pdev->bus),
+					   PCI_BUS_NUM(bdf),
+					   GET_PCI_DEVFN(bdf));
+	if (!pdev) {
+		pci_err(hns3_pmu->pdev, "Nonexistent EP device: %#x!\n", bdf);
+		return false;
+	}
+
+	pci_dev_put(pdev);
+	return true;
+}
+
+static void hns3_pmu_set_qid_para(struct hns3_pmu *hns3_pmu, u32 idx, u16 bdf,
+				  u16 queue)
+{
+	u32 val;
+
+	val = GET_PCI_DEVFN(bdf);
+	val |= (u32)queue << HNS3_PMU_QID_PARA_QUEUE_S;
+	hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_QID_PARA, idx, val);
+}
+
+static bool hns3_pmu_qid_req_start(struct hns3_pmu *hns3_pmu, u32 idx)
+{
+	bool queue_id_valid = false;
+	u32 reg_qid_ctrl, val;
+	int err;
+
+	/* enable queue id request */
+	hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_QID_CTRL, idx,
+			HNS3_PMU_QID_CTRL_REQ_ENABLE);
+
+	reg_qid_ctrl = hns3_pmu_get_offset(HNS3_PMU_REG_EVENT_QID_CTRL, idx);
+	err = readl_poll_timeout(hns3_pmu->base + reg_qid_ctrl, val,
+				 val & HNS3_PMU_QID_CTRL_DONE, 1, 1000);
+	if (err == -ETIMEDOUT) {
+		pci_err(hns3_pmu->pdev, "QID request timeout!\n");
+		goto out;
+	}
+
+	queue_id_valid = !(val & HNS3_PMU_QID_CTRL_MISS);
+
+out:
+	/* disable qid request and clear status */
+	hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_QID_CTRL, idx, 0);
+
+	return queue_id_valid;
+}
+
+static bool hns3_pmu_valid_queue(struct hns3_pmu *hns3_pmu, u32 idx, u16 bdf,
+				 u16 queue)
+{
+	hns3_pmu_set_qid_para(hns3_pmu, idx, bdf, queue);
+
+	return hns3_pmu_qid_req_start(hns3_pmu, idx);
+}
+
+static struct hns3_pmu_event_attr *hns3_pmu_get_pmu_event(u32 event)
+{
+	struct hns3_pmu_event_attr *pmu_event;
+	struct dev_ext_attribute *eattr;
+	struct device_attribute *dattr;
+	struct attribute *attr;
+	u32 i;
+
+	for (i = 0; i < ARRAY_SIZE(hns3_pmu_events_attr) - 1; i++) {
+		attr = hns3_pmu_events_attr[i];
+		dattr = container_of(attr, struct device_attribute, attr);
+		eattr = container_of(dattr, struct dev_ext_attribute, attr);
+		pmu_event = eattr->var;
+
+		if (event == pmu_event->event)
+			return pmu_event;
+	}
+
+	return NULL;
+}
+
+static int hns3_pmu_set_func_mode(struct perf_event *event,
+				  struct hns3_pmu *hns3_pmu)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u16 bdf = hns3_pmu_get_bdf(event);
+
+	if (!hns3_pmu_valid_bdf(hns3_pmu, bdf))
+		return -ENOENT;
+
+	HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_FUNC);
+
+	return 0;
+}
+
+static int hns3_pmu_set_func_queue_mode(struct perf_event *event,
+					struct hns3_pmu *hns3_pmu)
+{
+	u16 queue_id = hns3_pmu_get_queue(event);
+	struct hw_perf_event *hwc = &event->hw;
+	u16 bdf = hns3_pmu_get_bdf(event);
+
+	if (!hns3_pmu_valid_bdf(hns3_pmu, bdf))
+		return -ENOENT;
+
+	if (!hns3_pmu_valid_queue(hns3_pmu, hwc->idx, bdf, queue_id)) {
+		pci_err(hns3_pmu->pdev, "Invalid queue: %u\n", queue_id);
+		return -ENOENT;
+	}
+
+	HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_FUNC_QUEUE);
+
+	return 0;
+}
+
+static bool
+hns3_pmu_is_enabled_global_mode(struct perf_event *event,
+				struct hns3_pmu_event_attr *pmu_event)
+{
+	u8 global = hns3_pmu_get_global(event);
+
+	if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_GLOBAL))
+		return false;
+
+	return global;
+}
+
+static bool hns3_pmu_is_enabled_func_mode(struct perf_event *event,
+					  struct hns3_pmu_event_attr *pmu_event)
+{
+	u16 queue_id = hns3_pmu_get_queue(event);
+	u16 bdf = hns3_pmu_get_bdf(event);
+
+	if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC))
+		return false;
+	else if (queue_id != HNS3_PMU_FILTER_ALL_QUEUE)
+		return false;
+
+	return bdf;
+}
+
+static bool
+hns3_pmu_is_enabled_func_queue_mode(struct perf_event *event,
+				    struct hns3_pmu_event_attr *pmu_event)
+{
+	u16 queue_id = hns3_pmu_get_queue(event);
+	u16 bdf = hns3_pmu_get_bdf(event);
+
+	if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_QUEUE))
+		return false;
+	else if (queue_id == HNS3_PMU_FILTER_ALL_QUEUE)
+		return false;
+
+	return bdf;
+}
+
+static bool hns3_pmu_is_enabled_port_mode(struct perf_event *event,
+					  struct hns3_pmu_event_attr *pmu_event)
+{
+	u8 tc_id = hns3_pmu_get_tc(event);
+
+	if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT))
+		return false;
+
+	return tc_id == HNS3_PMU_FILTER_ALL_TC;
+}
+
+static bool
+hns3_pmu_is_enabled_port_tc_mode(struct perf_event *event,
+				 struct hns3_pmu_event_attr *pmu_event)
+{
+	u8 tc_id = hns3_pmu_get_tc(event);
+
+	if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT_TC))
+		return false;
+
+	return tc_id != HNS3_PMU_FILTER_ALL_TC;
+}
+
+static bool
+hns3_pmu_is_enabled_func_intr_mode(struct perf_event *event,
+				   struct hns3_pmu *hns3_pmu,
+				   struct hns3_pmu_event_attr *pmu_event)
+{
+	u16 bdf = hns3_pmu_get_bdf(event);
+
+	if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_INTR))
+		return false;
+
+	return hns3_pmu_valid_bdf(hns3_pmu, bdf);
+}
+
+static int hns3_pmu_select_filter_mode(struct perf_event *event,
+				       struct hns3_pmu *hns3_pmu)
+{
+	u32 event_id = hns3_pmu_get_event(event);
+	struct hw_perf_event *hwc = &event->hw;
+	struct hns3_pmu_event_attr *pmu_event;
+
+	pmu_event = hns3_pmu_get_pmu_event(event_id);
+	if (!pmu_event) {
+		pci_err(hns3_pmu->pdev, "Invalid pmu event\n");
+		return -ENOENT;
+	}
+
+	if (hns3_pmu_is_enabled_global_mode(event, pmu_event)) {
+		HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_GLOBAL);
+		return 0;
+	}
+
+	if (hns3_pmu_is_enabled_func_mode(event, pmu_event))
+		return hns3_pmu_set_func_mode(event, hns3_pmu);
+
+	if (hns3_pmu_is_enabled_func_queue_mode(event, pmu_event))
+		return hns3_pmu_set_func_queue_mode(event, hns3_pmu);
+
+	if (hns3_pmu_is_enabled_port_mode(event, pmu_event)) {
+		HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_PORT);
+		return 0;
+	}
+
+	if (hns3_pmu_is_enabled_port_tc_mode(event, pmu_event)) {
+		HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_PORT_TC);
+		return 0;
+	}
+
+	if (hns3_pmu_is_enabled_func_intr_mode(event, hns3_pmu, pmu_event)) {
+		HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_FUNC_INTR);
+		return 0;
+	}
+
+	return -ENOENT;
+}
+
+static bool hns3_pmu_validate_event_group(struct perf_event *event)
+{
+	struct perf_event *sibling, *leader = event->group_leader;
+	struct perf_event *event_group[HNS3_PMU_MAX_HW_EVENTS];
+	int counters = 1;
+	int num;
+
+	event_group[0] = leader;
+	if (!is_software_event(leader)) {
+		if (leader->pmu != event->pmu)
+			return false;
+
+		if (leader != event && !hns3_pmu_cmp_event(leader, event))
+			event_group[counters++] = event;
+	}
+
+	for_each_sibling_event(sibling, event->group_leader) {
+		if (is_software_event(sibling))
+			continue;
+
+		if (sibling->pmu != event->pmu)
+			return false;
+
+		for (num = 0; num < counters; num++) {
+			if (hns3_pmu_cmp_event(event_group[num], sibling))
+				break;
+		}
+
+		if (num == counters)
+			event_group[counters++] = sibling;
+	}
+
+	return counters <= HNS3_PMU_MAX_HW_EVENTS;
+}
+
+static u32 hns3_pmu_get_filter_condition(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u16 intr_id = hns3_pmu_get_intr(event);
+	u8 port_id = hns3_pmu_get_port(event);
+	u16 bdf = hns3_pmu_get_bdf(event);
+	u8 tc_id = hns3_pmu_get_tc(event);
+	u8 filter_mode;
+
+	filter_mode = *(u8 *)hwc->addr_filters;
+	switch (filter_mode) {
+	case HNS3_PMU_HW_FILTER_PORT:
+		return FILTER_CONDITION_PORT(port_id);
+	case HNS3_PMU_HW_FILTER_PORT_TC:
+		return FILTER_CONDITION_PORT_TC(port_id, tc_id);
+	case HNS3_PMU_HW_FILTER_FUNC:
+	case HNS3_PMU_HW_FILTER_FUNC_QUEUE:
+		return GET_PCI_DEVFN(bdf);
+	case HNS3_PMU_HW_FILTER_FUNC_INTR:
+		return FILTER_CONDITION_FUNC_INTR(GET_PCI_DEVFN(bdf), intr_id);
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static void hns3_pmu_config_filter(struct perf_event *event)
+{
+	struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu);
+	u8 event_type = hns3_pmu_get_event_type(event);
+	u8 subevent_id = hns3_pmu_get_subevent(event);
+	u16 queue_id = hns3_pmu_get_queue(event);
+	struct hw_perf_event *hwc = &event->hw;
+	u8 filter_mode = *(u8 *)hwc->addr_filters;
+	u16 bdf = hns3_pmu_get_bdf(event);
+	u32 idx = hwc->idx;
+	u32 val;
+
+	val = event_type;
+	val |= subevent_id << HNS3_PMU_CTRL_SUBEVENT_S;
+	val |= filter_mode << HNS3_PMU_CTRL_FILTER_MODE_S;
+	val |= HNS3_PMU_EVENT_OVERFLOW_RESTART;
+	hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val);
+
+	val = hns3_pmu_get_filter_condition(event);
+	hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_HIGH, idx, val);
+
+	if (filter_mode == HNS3_PMU_HW_FILTER_FUNC_QUEUE)
+		hns3_pmu_set_qid_para(hns3_pmu, idx, bdf, queue_id);
+}
+
+static void hns3_pmu_enable_counter(struct hns3_pmu *hns3_pmu,
+				    struct hw_perf_event *hwc)
+{
+	u32 idx = hwc->idx;
+	u32 val;
+
+	val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx);
+	val |= HNS3_PMU_EVENT_EN;
+	hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val);
+}
+
+static void hns3_pmu_disable_counter(struct hns3_pmu *hns3_pmu,
+				     struct hw_perf_event *hwc)
+{
+	u32 idx = hwc->idx;
+	u32 val;
+
+	val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx);
+	val &= ~HNS3_PMU_EVENT_EN;
+	hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val);
+}
+
+static void hns3_pmu_enable_intr(struct hns3_pmu *hns3_pmu,
+				 struct hw_perf_event *hwc)
+{
+	u32 idx = hwc->idx;
+	u32 val;
+
+	val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx);
+	val &= ~HNS3_PMU_INTR_MASK_OVERFLOW;
+	hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx, val);
+}
+
+static void hns3_pmu_disable_intr(struct hns3_pmu *hns3_pmu,
+				  struct hw_perf_event *hwc)
+{
+	u32 idx = hwc->idx;
+	u32 val;
+
+	val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx);
+	val |= HNS3_PMU_INTR_MASK_OVERFLOW;
+	hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx, val);
+}
+
+static void hns3_pmu_clear_intr_status(struct hns3_pmu *hns3_pmu, u32 idx)
+{
+	u32 val;
+
+	val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx);
+	val |= HNS3_PMU_EVENT_STATUS_RESET;
+	hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val);
+
+	val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx);
+	val &= ~HNS3_PMU_EVENT_STATUS_RESET;
+	hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val);
+}
+
+static u64 hns3_pmu_read_counter(struct perf_event *event)
+{
+	struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu);
+
+	return hns3_pmu_readq(hns3_pmu, event->hw.event_base, event->hw.idx);
+}
+
+static void hns3_pmu_write_counter(struct perf_event *event, u64 value)
+{
+	struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu);
+	u32 idx = event->hw.idx;
+
+	hns3_pmu_writeq(hns3_pmu, HNS3_PMU_REG_EVENT_COUNTER, idx, value);
+	hns3_pmu_writeq(hns3_pmu, HNS3_PMU_REG_EVENT_EXT_COUNTER, idx, value);
+}
+
+static void hns3_pmu_init_counter(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	local64_set(&hwc->prev_count, 0);
+	hns3_pmu_write_counter(event, 0);
+}
+
+static int hns3_pmu_event_init(struct perf_event *event)
+{
+	struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+	int ret;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/* Sampling is not supported */
+	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+		return -EOPNOTSUPP;
+
+	event->cpu = hns3_pmu->on_cpu;
+
+	idx = hns3_pmu_get_event_idx(hns3_pmu);
+	if (idx < 0) {
+		pci_err(hns3_pmu->pdev, "Up to %u events are supported!\n",
+			HNS3_PMU_MAX_HW_EVENTS);
+		return -EBUSY;
+	}
+
+	hwc->idx = idx;
+
+	ret = hns3_pmu_select_filter_mode(event, hns3_pmu);
+	if (ret) {
+		pci_err(hns3_pmu->pdev, "Invalid filter, ret = %d.\n", ret);
+		return ret;
+	}
+
+	if (!hns3_pmu_validate_event_group(event)) {
+		pci_err(hns3_pmu->pdev, "Invalid event group.\n");
+		return -EINVAL;
+	}
+
+	if (hns3_pmu_get_ext_counter_used(event))
+		hwc->event_base = HNS3_PMU_REG_EVENT_EXT_COUNTER;
+	else
+		hwc->event_base = HNS3_PMU_REG_EVENT_COUNTER;
+
+	return 0;
+}
+
+static void hns3_pmu_read(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 new_cnt, prev_cnt, delta;
+
+	do {
+		prev_cnt = local64_read(&hwc->prev_count);
+		new_cnt = hns3_pmu_read_counter(event);
+	} while (local64_cmpxchg(&hwc->prev_count, prev_cnt, new_cnt) !=
+		 prev_cnt);
+
+	delta = new_cnt - prev_cnt;
+	local64_add(delta, &event->count);
+}
+
+static void hns3_pmu_start(struct perf_event *event, int flags)
+{
+	struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+		return;
+
+	WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+	hwc->state = 0;
+
+	hns3_pmu_config_filter(event);
+	hns3_pmu_init_counter(event);
+	hns3_pmu_enable_intr(hns3_pmu, hwc);
+	hns3_pmu_enable_counter(hns3_pmu, hwc);
+
+	perf_event_update_userpage(event);
+}
+
+static void hns3_pmu_stop(struct perf_event *event, int flags)
+{
+	struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	hns3_pmu_disable_counter(hns3_pmu, hwc);
+	hns3_pmu_disable_intr(hns3_pmu, hwc);
+
+	WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+	hwc->state |= PERF_HES_STOPPED;
+
+	if (hwc->state & PERF_HES_UPTODATE)
+		return;
+
+	/* Read hardware counter and update the perf counter statistics */
+	hns3_pmu_read(event);
+	hwc->state |= PERF_HES_UPTODATE;
+}
+
+static int hns3_pmu_add(struct perf_event *event, int flags)
+{
+	struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+	/* Check all working events to find a related event. */
+	idx = hns3_pmu_find_related_event_idx(hns3_pmu, event);
+	if (idx < 0 && idx != -ENOENT)
+		return idx;
+
+	/* Current event shares an enabled hardware event with related event */
+	if (idx >= 0 && idx < HNS3_PMU_MAX_HW_EVENTS) {
+		hwc->idx = idx;
+		goto start_count;
+	}
+
+	idx = hns3_pmu_get_event_idx(hns3_pmu);
+	if (idx < 0)
+		return idx;
+
+	hwc->idx = idx;
+	hns3_pmu->hw_events[idx] = event;
+
+start_count:
+	if (flags & PERF_EF_START)
+		hns3_pmu_start(event, PERF_EF_RELOAD);
+
+	return 0;
+}
+
+static void hns3_pmu_del(struct perf_event *event, int flags)
+{
+	struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	hns3_pmu_stop(event, PERF_EF_UPDATE);
+	hns3_pmu->hw_events[hwc->idx] = NULL;
+	perf_event_update_userpage(event);
+}
+
+static void hns3_pmu_enable(struct pmu *pmu)
+{
+	struct hns3_pmu *hns3_pmu = to_hns3_pmu(pmu);
+	u32 val;
+
+	val = readl(hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL);
+	val |= HNS3_PMU_GLOBAL_START;
+	writel(val, hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL);
+}
+
+static void hns3_pmu_disable(struct pmu *pmu)
+{
+	struct hns3_pmu *hns3_pmu = to_hns3_pmu(pmu);
+	u32 val;
+
+	val = readl(hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL);
+	val &= ~HNS3_PMU_GLOBAL_START;
+	writel(val, hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL);
+}
+
+static int hns3_pmu_alloc_pmu(struct pci_dev *pdev, struct hns3_pmu *hns3_pmu)
+{
+	u16 device_id;
+	char *name;
+	u32 val;
+
+	hns3_pmu->base = pcim_iomap_table(pdev)[BAR_2];
+	if (!hns3_pmu->base) {
+		pci_err(pdev, "ioremap failed\n");
+		return -ENOMEM;
+	}
+
+	hns3_pmu->hw_clk_freq = readl(hns3_pmu->base + HNS3_PMU_REG_CLOCK_FREQ);
+
+	val = readl(hns3_pmu->base + HNS3_PMU_REG_BDF);
+	hns3_pmu->bdf_min = val & 0xffff;
+	hns3_pmu->bdf_max = val >> 16;
+
+	val = readl(hns3_pmu->base + HNS3_PMU_REG_DEVICE_ID);
+	device_id = val & 0xffff;
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hns3_pmu_sicl_%u", device_id);
+	if (!name)
+		return -ENOMEM;
+
+	hns3_pmu->pdev = pdev;
+	hns3_pmu->on_cpu = -1;
+	hns3_pmu->identifier = readl(hns3_pmu->base + HNS3_PMU_REG_VERSION);
+	hns3_pmu->pmu = (struct pmu) {
+		.name		= name,
+		.module		= THIS_MODULE,
+		.event_init	= hns3_pmu_event_init,
+		.pmu_enable	= hns3_pmu_enable,
+		.pmu_disable	= hns3_pmu_disable,
+		.add		= hns3_pmu_add,
+		.del		= hns3_pmu_del,
+		.start		= hns3_pmu_start,
+		.stop		= hns3_pmu_stop,
+		.read		= hns3_pmu_read,
+		.task_ctx_nr	= perf_invalid_context,
+		.attr_groups	= hns3_pmu_attr_groups,
+		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
+	};
+
+	return 0;
+}
+
+static irqreturn_t hns3_pmu_irq(int irq, void *data)
+{
+	struct hns3_pmu *hns3_pmu = data;
+	u32 intr_status, idx;
+
+	for (idx = 0; idx < HNS3_PMU_MAX_HW_EVENTS; idx++) {
+		intr_status = hns3_pmu_readl(hns3_pmu,
+					     HNS3_PMU_REG_EVENT_INTR_STATUS,
+					     idx);
+
+		/*
+		 * As each counter will restart from 0 when it is overflowed,
+		 * extra processing is no need, just clear interrupt status.
+		 */
+		if (intr_status)
+			hns3_pmu_clear_intr_status(hns3_pmu, idx);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int hns3_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct hns3_pmu *hns3_pmu;
+
+	hns3_pmu = hlist_entry_safe(node, struct hns3_pmu, node);
+	if (!hns3_pmu)
+		return -ENODEV;
+
+	if (hns3_pmu->on_cpu == -1) {
+		hns3_pmu->on_cpu = cpu;
+		irq_set_affinity(hns3_pmu->irq, cpumask_of(cpu));
+	}
+
+	return 0;
+}
+
+static int hns3_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct hns3_pmu *hns3_pmu;
+	unsigned int target;
+
+	hns3_pmu = hlist_entry_safe(node, struct hns3_pmu, node);
+	if (!hns3_pmu)
+		return -ENODEV;
+
+	/* Nothing to do if this CPU doesn't own the PMU */
+	if (hns3_pmu->on_cpu != cpu)
+		return 0;
+
+	/* Choose a new CPU from all online cpus */
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	perf_pmu_migrate_context(&hns3_pmu->pmu, cpu, target);
+	hns3_pmu->on_cpu = target;
+	irq_set_affinity(hns3_pmu->irq, cpumask_of(target));
+
+	return 0;
+}
+
+static void hns3_pmu_free_irq(void *data)
+{
+	struct pci_dev *pdev = data;
+
+	pci_free_irq_vectors(pdev);
+}
+
+static int hns3_pmu_irq_register(struct pci_dev *pdev,
+				 struct hns3_pmu *hns3_pmu)
+{
+	int irq, ret;
+
+	ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
+	if (ret < 0) {
+		pci_err(pdev, "failed to enable MSI vectors, ret = %d.\n", ret);
+		return ret;
+	}
+
+	ret = devm_add_action(&pdev->dev, hns3_pmu_free_irq, pdev);
+	if (ret) {
+		pci_err(pdev, "failed to add free irq action, ret = %d.\n", ret);
+		return ret;
+	}
+
+	irq = pci_irq_vector(pdev, 0);
+	ret = devm_request_irq(&pdev->dev, irq, hns3_pmu_irq, 0,
+			       hns3_pmu->pmu.name, hns3_pmu);
+	if (ret) {
+		pci_err(pdev, "failed to register irq, ret = %d.\n", ret);
+		return ret;
+	}
+
+	hns3_pmu->irq = irq;
+
+	return 0;
+}
+
+static int hns3_pmu_init_pmu(struct pci_dev *pdev, struct hns3_pmu *hns3_pmu)
+{
+	int ret;
+
+	ret = hns3_pmu_alloc_pmu(pdev, hns3_pmu);
+	if (ret)
+		return ret;
+
+	ret = hns3_pmu_irq_register(pdev, hns3_pmu);
+	if (ret)
+		return ret;
+
+	ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
+				       &hns3_pmu->node);
+	if (ret) {
+		pci_err(pdev, "failed to register hotplug, ret = %d.\n", ret);
+		return ret;
+	}
+
+	ret = perf_pmu_register(&hns3_pmu->pmu, hns3_pmu->pmu.name, -1);
+	if (ret) {
+		pci_err(pdev, "failed to register perf PMU, ret = %d.\n", ret);
+		cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
+						    &hns3_pmu->node);
+	}
+
+	return ret;
+}
+
+static void hns3_pmu_uninit_pmu(struct pci_dev *pdev)
+{
+	struct hns3_pmu *hns3_pmu = pci_get_drvdata(pdev);
+
+	perf_pmu_unregister(&hns3_pmu->pmu);
+	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
+					    &hns3_pmu->node);
+}
+
+static int hns3_pmu_init_dev(struct pci_dev *pdev)
+{
+	int ret;
+
+	ret = pcim_enable_device(pdev);
+	if (ret) {
+		pci_err(pdev, "failed to enable pci device, ret = %d.\n", ret);
+		return ret;
+	}
+
+	ret = pcim_iomap_regions(pdev, BIT(BAR_2), "hns3_pmu");
+	if (ret < 0) {
+		pci_err(pdev, "failed to request pci region, ret = %d.\n", ret);
+		return ret;
+	}
+
+	pci_set_master(pdev);
+
+	return 0;
+}
+
+static int hns3_pmu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct hns3_pmu *hns3_pmu;
+	int ret;
+
+	hns3_pmu = devm_kzalloc(&pdev->dev, sizeof(*hns3_pmu), GFP_KERNEL);
+	if (!hns3_pmu)
+		return -ENOMEM;
+
+	ret = hns3_pmu_init_dev(pdev);
+	if (ret)
+		return ret;
+
+	ret = hns3_pmu_init_pmu(pdev, hns3_pmu);
+	if (ret) {
+		pci_clear_master(pdev);
+		return ret;
+	}
+
+	pci_set_drvdata(pdev, hns3_pmu);
+
+	return ret;
+}
+
+static void hns3_pmu_remove(struct pci_dev *pdev)
+{
+	hns3_pmu_uninit_pmu(pdev);
+	pci_clear_master(pdev);
+	pci_set_drvdata(pdev, NULL);
+}
+
+static const struct pci_device_id hns3_pmu_ids[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, 0xa22b) },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, hns3_pmu_ids);
+
+static struct pci_driver hns3_pmu_driver = {
+	.name = "hns3_pmu",
+	.id_table = hns3_pmu_ids,
+	.probe = hns3_pmu_probe,
+	.remove = hns3_pmu_remove,
+};
+
+static int __init hns3_pmu_module_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
+				      "AP_PERF_ARM_HNS3_PMU_ONLINE",
+				      hns3_pmu_online_cpu,
+				      hns3_pmu_offline_cpu);
+	if (ret) {
+		pr_err("failed to setup HNS3 PMU hotplug, ret = %d.\n", ret);
+		return ret;
+	}
+
+	ret = pci_register_driver(&hns3_pmu_driver);
+	if (ret) {
+		pr_err("failed to register pci driver, ret = %d.\n", ret);
+		cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE);
+	}
+
+	return ret;
+}
+module_init(hns3_pmu_module_init);
+
+static void __exit hns3_pmu_module_exit(void)
+{
+	pci_unregister_driver(&hns3_pmu_driver);
+	cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE);
+}
+module_exit(hns3_pmu_module_exit);
+
+MODULE_DESCRIPTION("HNS3 PMU driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/marvell_cn10k_ddr_pmu.c b/drivers/perf/marvell_cn10k_ddr_pmu.c
new file mode 100644
index 0000000000..524ba82bfc
--- /dev/null
+++ b/drivers/perf/marvell_cn10k_ddr_pmu.c
@@ -0,0 +1,767 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell CN10K DRAM Subsystem (DSS) Performance Monitor Driver
+ *
+ * Copyright (C) 2021 Marvell.
+ */
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/perf_event.h>
+#include <linux/hrtimer.h>
+#include <linux/acpi.h>
+#include <linux/platform_device.h>
+
+/* Performance Counters Operating Mode Control Registers */
+#define DDRC_PERF_CNT_OP_MODE_CTRL	0x8020
+#define OP_MODE_CTRL_VAL_MANNUAL	0x1
+
+/* Performance Counters Start Operation Control Registers */
+#define DDRC_PERF_CNT_START_OP_CTRL	0x8028
+#define START_OP_CTRL_VAL_START		0x1ULL
+#define START_OP_CTRL_VAL_ACTIVE	0x2
+
+/* Performance Counters End Operation Control Registers */
+#define DDRC_PERF_CNT_END_OP_CTRL	0x8030
+#define END_OP_CTRL_VAL_END		0x1ULL
+
+/* Performance Counters End Status Registers */
+#define DDRC_PERF_CNT_END_STATUS		0x8038
+#define END_STATUS_VAL_END_TIMER_MODE_END	0x1
+
+/* Performance Counters Configuration Registers */
+#define DDRC_PERF_CFG_BASE		0x8040
+
+/* 8 Generic event counter + 2 fixed event counters */
+#define DDRC_PERF_NUM_GEN_COUNTERS	8
+#define DDRC_PERF_NUM_FIX_COUNTERS	2
+#define DDRC_PERF_READ_COUNTER_IDX	DDRC_PERF_NUM_GEN_COUNTERS
+#define DDRC_PERF_WRITE_COUNTER_IDX	(DDRC_PERF_NUM_GEN_COUNTERS + 1)
+#define DDRC_PERF_NUM_COUNTERS		(DDRC_PERF_NUM_GEN_COUNTERS + \
+					 DDRC_PERF_NUM_FIX_COUNTERS)
+
+/* Generic event counter registers */
+#define DDRC_PERF_CFG(n)		(DDRC_PERF_CFG_BASE + 8 * (n))
+#define EVENT_ENABLE			BIT_ULL(63)
+
+/* Two dedicated event counters for DDR reads and writes */
+#define EVENT_DDR_READS			101
+#define EVENT_DDR_WRITES		100
+
+/*
+ * programmable events IDs in programmable event counters.
+ * DO NOT change these event-id numbers, they are used to
+ * program event bitmap in h/w.
+ */
+#define EVENT_OP_IS_ZQLATCH			55
+#define EVENT_OP_IS_ZQSTART			54
+#define EVENT_OP_IS_TCR_MRR			53
+#define EVENT_OP_IS_DQSOSC_MRR			52
+#define EVENT_OP_IS_DQSOSC_MPC			51
+#define EVENT_VISIBLE_WIN_LIMIT_REACHED_WR	50
+#define EVENT_VISIBLE_WIN_LIMIT_REACHED_RD	49
+#define EVENT_BSM_STARVATION			48
+#define EVENT_BSM_ALLOC				47
+#define EVENT_LPR_REQ_WITH_NOCREDIT		46
+#define EVENT_HPR_REQ_WITH_NOCREDIT		45
+#define EVENT_OP_IS_ZQCS			44
+#define EVENT_OP_IS_ZQCL			43
+#define EVENT_OP_IS_LOAD_MODE			42
+#define EVENT_OP_IS_SPEC_REF			41
+#define EVENT_OP_IS_CRIT_REF			40
+#define EVENT_OP_IS_REFRESH			39
+#define EVENT_OP_IS_ENTER_MPSM			35
+#define EVENT_OP_IS_ENTER_POWERDOWN		31
+#define EVENT_OP_IS_ENTER_SELFREF		27
+#define EVENT_WAW_HAZARD			26
+#define EVENT_RAW_HAZARD			25
+#define EVENT_WAR_HAZARD			24
+#define EVENT_WRITE_COMBINE			23
+#define EVENT_RDWR_TRANSITIONS			22
+#define EVENT_PRECHARGE_FOR_OTHER		21
+#define EVENT_PRECHARGE_FOR_RDWR		20
+#define EVENT_OP_IS_PRECHARGE			19
+#define EVENT_OP_IS_MWR				18
+#define EVENT_OP_IS_WR				17
+#define EVENT_OP_IS_RD				16
+#define EVENT_OP_IS_RD_ACTIVATE			15
+#define EVENT_OP_IS_RD_OR_WR			14
+#define EVENT_OP_IS_ACTIVATE			13
+#define EVENT_WR_XACT_WHEN_CRITICAL		12
+#define EVENT_LPR_XACT_WHEN_CRITICAL		11
+#define EVENT_HPR_XACT_WHEN_CRITICAL		10
+#define EVENT_DFI_RD_DATA_CYCLES		9
+#define EVENT_DFI_WR_DATA_CYCLES		8
+#define EVENT_ACT_BYPASS			7
+#define EVENT_READ_BYPASS			6
+#define EVENT_HIF_HI_PRI_RD			5
+#define EVENT_HIF_RMW				4
+#define EVENT_HIF_RD				3
+#define EVENT_HIF_WR				2
+#define EVENT_HIF_RD_OR_WR			1
+
+/* Event counter value registers */
+#define DDRC_PERF_CNT_VALUE_BASE		0x8080
+#define DDRC_PERF_CNT_VALUE(n)	(DDRC_PERF_CNT_VALUE_BASE + 8 * (n))
+
+/* Fixed event counter enable/disable register */
+#define DDRC_PERF_CNT_FREERUN_EN	0x80C0
+#define DDRC_PERF_FREERUN_WRITE_EN	0x1
+#define DDRC_PERF_FREERUN_READ_EN	0x2
+
+/* Fixed event counter control register */
+#define DDRC_PERF_CNT_FREERUN_CTRL	0x80C8
+#define DDRC_FREERUN_WRITE_CNT_CLR	0x1
+#define DDRC_FREERUN_READ_CNT_CLR	0x2
+
+/* Fixed event counter value register */
+#define DDRC_PERF_CNT_VALUE_WR_OP	0x80D0
+#define DDRC_PERF_CNT_VALUE_RD_OP	0x80D8
+#define DDRC_PERF_CNT_VALUE_OVERFLOW	BIT_ULL(48)
+#define DDRC_PERF_CNT_MAX_VALUE		GENMASK_ULL(48, 0)
+
+struct cn10k_ddr_pmu {
+	struct pmu pmu;
+	void __iomem *base;
+	unsigned int cpu;
+	struct	device *dev;
+	int active_events;
+	struct perf_event *events[DDRC_PERF_NUM_COUNTERS];
+	struct hrtimer hrtimer;
+	struct hlist_node node;
+};
+
+#define to_cn10k_ddr_pmu(p)	container_of(p, struct cn10k_ddr_pmu, pmu)
+
+static ssize_t cn10k_ddr_pmu_event_show(struct device *dev,
+					struct device_attribute *attr,
+					char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+	return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
+
+}
+
+#define CN10K_DDR_PMU_EVENT_ATTR(_name, _id)				     \
+	PMU_EVENT_ATTR_ID(_name, cn10k_ddr_pmu_event_show, _id)
+
+static struct attribute *cn10k_ddr_perf_events_attrs[] = {
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rd_or_wr_access, EVENT_HIF_RD_OR_WR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_wr_access, EVENT_HIF_WR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rd_access, EVENT_HIF_RD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rmw_access, EVENT_HIF_RMW),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_pri_rdaccess, EVENT_HIF_HI_PRI_RD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_rd_bypass_access, EVENT_READ_BYPASS),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_act_bypass_access, EVENT_ACT_BYPASS),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_dif_wr_data_access, EVENT_DFI_WR_DATA_CYCLES),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_dif_rd_data_access, EVENT_DFI_RD_DATA_CYCLES),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_hpri_sched_rd_crit_access,
+					EVENT_HPR_XACT_WHEN_CRITICAL),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_lpri_sched_rd_crit_access,
+					EVENT_LPR_XACT_WHEN_CRITICAL),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_wr_trxn_crit_access,
+					EVENT_WR_XACT_WHEN_CRITICAL),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_active_access, EVENT_OP_IS_ACTIVATE),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_rd_or_wr_access, EVENT_OP_IS_RD_OR_WR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_rd_active_access, EVENT_OP_IS_RD_ACTIVATE),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_read, EVENT_OP_IS_RD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_write, EVENT_OP_IS_WR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_mwr, EVENT_OP_IS_MWR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge, EVENT_OP_IS_PRECHARGE),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge_for_rdwr, EVENT_PRECHARGE_FOR_RDWR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge_for_other,
+					EVENT_PRECHARGE_FOR_OTHER),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_rdwr_transitions, EVENT_RDWR_TRANSITIONS),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_write_combine, EVENT_WRITE_COMBINE),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_war_hazard, EVENT_WAR_HAZARD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_raw_hazard, EVENT_RAW_HAZARD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_waw_hazard, EVENT_WAW_HAZARD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_selfref, EVENT_OP_IS_ENTER_SELFREF),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_powerdown, EVENT_OP_IS_ENTER_POWERDOWN),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_mpsm, EVENT_OP_IS_ENTER_MPSM),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_refresh, EVENT_OP_IS_REFRESH),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_crit_ref, EVENT_OP_IS_CRIT_REF),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_spec_ref, EVENT_OP_IS_SPEC_REF),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_load_mode, EVENT_OP_IS_LOAD_MODE),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_zqcl, EVENT_OP_IS_ZQCL),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_wr_access, EVENT_OP_IS_ZQCS),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_hpr_req_with_nocredit,
+					EVENT_HPR_REQ_WITH_NOCREDIT),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_lpr_req_with_nocredit,
+					EVENT_LPR_REQ_WITH_NOCREDIT),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_bsm_alloc, EVENT_BSM_ALLOC),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_bsm_starvation, EVENT_BSM_STARVATION),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_win_limit_reached_rd,
+					EVENT_VISIBLE_WIN_LIMIT_REACHED_RD),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_win_limit_reached_wr,
+					EVENT_VISIBLE_WIN_LIMIT_REACHED_WR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_dqsosc_mpc, EVENT_OP_IS_DQSOSC_MPC),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_dqsosc_mrr, EVENT_OP_IS_DQSOSC_MRR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_tcr_mrr, EVENT_OP_IS_TCR_MRR),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_zqstart, EVENT_OP_IS_ZQSTART),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_zqlatch, EVENT_OP_IS_ZQLATCH),
+	/* Free run event counters */
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_ddr_reads, EVENT_DDR_READS),
+	CN10K_DDR_PMU_EVENT_ATTR(ddr_ddr_writes, EVENT_DDR_WRITES),
+	NULL
+};
+
+static struct attribute_group cn10k_ddr_perf_events_attr_group = {
+	.name = "events",
+	.attrs = cn10k_ddr_perf_events_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-8");
+
+static struct attribute *cn10k_ddr_perf_format_attrs[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group cn10k_ddr_perf_format_attr_group = {
+	.name = "format",
+	.attrs = cn10k_ddr_perf_format_attrs,
+};
+
+static ssize_t cn10k_ddr_perf_cpumask_show(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct cn10k_ddr_pmu *pmu = dev_get_drvdata(dev);
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu));
+}
+
+static struct device_attribute cn10k_ddr_perf_cpumask_attr =
+	__ATTR(cpumask, 0444, cn10k_ddr_perf_cpumask_show, NULL);
+
+static struct attribute *cn10k_ddr_perf_cpumask_attrs[] = {
+	&cn10k_ddr_perf_cpumask_attr.attr,
+	NULL,
+};
+
+static struct attribute_group cn10k_ddr_perf_cpumask_attr_group = {
+	.attrs = cn10k_ddr_perf_cpumask_attrs,
+};
+
+static const struct attribute_group *cn10k_attr_groups[] = {
+	&cn10k_ddr_perf_events_attr_group,
+	&cn10k_ddr_perf_format_attr_group,
+	&cn10k_ddr_perf_cpumask_attr_group,
+	NULL,
+};
+
+/* Default poll timeout is 100 sec, which is very sufficient for
+ * 48 bit counter incremented max at 5.6 GT/s, which may take many
+ * hours to overflow.
+ */
+static unsigned long cn10k_ddr_pmu_poll_period_sec = 100;
+module_param_named(poll_period_sec, cn10k_ddr_pmu_poll_period_sec, ulong, 0644);
+
+static ktime_t cn10k_ddr_pmu_timer_period(void)
+{
+	return ms_to_ktime((u64)cn10k_ddr_pmu_poll_period_sec * USEC_PER_SEC);
+}
+
+static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap)
+{
+	switch (eventid) {
+	case EVENT_HIF_RD_OR_WR ... EVENT_WAW_HAZARD:
+	case EVENT_OP_IS_REFRESH ... EVENT_OP_IS_ZQLATCH:
+		*event_bitmap = (1ULL << (eventid - 1));
+		break;
+	case EVENT_OP_IS_ENTER_SELFREF:
+	case EVENT_OP_IS_ENTER_POWERDOWN:
+	case EVENT_OP_IS_ENTER_MPSM:
+		*event_bitmap = (0xFULL << (eventid - 1));
+		break;
+	default:
+		pr_err("%s Invalid eventid %d\n", __func__, eventid);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int cn10k_ddr_perf_alloc_counter(struct cn10k_ddr_pmu *pmu,
+					struct perf_event *event)
+{
+	u8 config = event->attr.config;
+	int i;
+
+	/* DDR read free-run counter index */
+	if (config == EVENT_DDR_READS) {
+		pmu->events[DDRC_PERF_READ_COUNTER_IDX] = event;
+		return DDRC_PERF_READ_COUNTER_IDX;
+	}
+
+	/* DDR write free-run counter index */
+	if (config == EVENT_DDR_WRITES) {
+		pmu->events[DDRC_PERF_WRITE_COUNTER_IDX] = event;
+		return DDRC_PERF_WRITE_COUNTER_IDX;
+	}
+
+	/* Allocate DDR generic counters */
+	for (i = 0; i < DDRC_PERF_NUM_GEN_COUNTERS; i++) {
+		if (pmu->events[i] == NULL) {
+			pmu->events[i] = event;
+			return i;
+		}
+	}
+
+	return -ENOENT;
+}
+
+static void cn10k_ddr_perf_free_counter(struct cn10k_ddr_pmu *pmu, int counter)
+{
+	pmu->events[counter] = NULL;
+}
+
+static int cn10k_ddr_perf_event_init(struct perf_event *event)
+{
+	struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (is_sampling_event(event)) {
+		dev_info(pmu->dev, "Sampling not supported!\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (event->cpu < 0) {
+		dev_warn(pmu->dev, "Can't provide per-task data!\n");
+		return -EOPNOTSUPP;
+	}
+
+	/*  We must NOT create groups containing mixed PMUs */
+	if (event->group_leader->pmu != event->pmu &&
+	    !is_software_event(event->group_leader))
+		return -EINVAL;
+
+	/* Set ownership of event to one CPU, same event can not be observed
+	 * on multiple cpus at same time.
+	 */
+	event->cpu = pmu->cpu;
+	hwc->idx = -1;
+	return 0;
+}
+
+static void cn10k_ddr_perf_counter_enable(struct cn10k_ddr_pmu *pmu,
+					  int counter, bool enable)
+{
+	u32 reg;
+	u64 val;
+
+	if (counter > DDRC_PERF_NUM_COUNTERS) {
+		pr_err("Error: unsupported counter %d\n", counter);
+		return;
+	}
+
+	if (counter < DDRC_PERF_NUM_GEN_COUNTERS) {
+		reg = DDRC_PERF_CFG(counter);
+		val = readq_relaxed(pmu->base + reg);
+
+		if (enable)
+			val |= EVENT_ENABLE;
+		else
+			val &= ~EVENT_ENABLE;
+
+		writeq_relaxed(val, pmu->base + reg);
+	} else {
+		val = readq_relaxed(pmu->base + DDRC_PERF_CNT_FREERUN_EN);
+		if (enable) {
+			if (counter == DDRC_PERF_READ_COUNTER_IDX)
+				val |= DDRC_PERF_FREERUN_READ_EN;
+			else
+				val |= DDRC_PERF_FREERUN_WRITE_EN;
+		} else {
+			if (counter == DDRC_PERF_READ_COUNTER_IDX)
+				val &= ~DDRC_PERF_FREERUN_READ_EN;
+			else
+				val &= ~DDRC_PERF_FREERUN_WRITE_EN;
+		}
+		writeq_relaxed(val, pmu->base + DDRC_PERF_CNT_FREERUN_EN);
+	}
+}
+
+static u64 cn10k_ddr_perf_read_counter(struct cn10k_ddr_pmu *pmu, int counter)
+{
+	u64 val;
+
+	if (counter == DDRC_PERF_READ_COUNTER_IDX)
+		return readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE_RD_OP);
+
+	if (counter == DDRC_PERF_WRITE_COUNTER_IDX)
+		return readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE_WR_OP);
+
+	val = readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE(counter));
+	return val;
+}
+
+static void cn10k_ddr_perf_event_update(struct perf_event *event)
+{
+	struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 prev_count, new_count, mask;
+
+	do {
+		prev_count = local64_read(&hwc->prev_count);
+		new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx);
+	} while (local64_xchg(&hwc->prev_count, new_count) != prev_count);
+
+	mask = DDRC_PERF_CNT_MAX_VALUE;
+
+	local64_add((new_count - prev_count) & mask, &event->count);
+}
+
+static void cn10k_ddr_perf_event_start(struct perf_event *event, int flags)
+{
+	struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
+	local64_set(&hwc->prev_count, 0);
+
+	cn10k_ddr_perf_counter_enable(pmu, counter, true);
+
+	hwc->state = 0;
+}
+
+static int cn10k_ddr_perf_event_add(struct perf_event *event, int flags)
+{
+	struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u8 config = event->attr.config;
+	int counter, ret;
+	u32 reg_offset;
+	u64 val;
+
+	counter = cn10k_ddr_perf_alloc_counter(pmu, event);
+	if (counter < 0)
+		return -EAGAIN;
+
+	pmu->active_events++;
+	hwc->idx = counter;
+
+	if (pmu->active_events == 1)
+		hrtimer_start(&pmu->hrtimer, cn10k_ddr_pmu_timer_period(),
+			      HRTIMER_MODE_REL_PINNED);
+
+	if (counter < DDRC_PERF_NUM_GEN_COUNTERS) {
+		/* Generic counters, configure event id */
+		reg_offset = DDRC_PERF_CFG(counter);
+		ret = ddr_perf_get_event_bitmap(config, &val);
+		if (ret)
+			return ret;
+
+		writeq_relaxed(val, pmu->base + reg_offset);
+	} else {
+		/* fixed event counter, clear counter value */
+		if (counter == DDRC_PERF_READ_COUNTER_IDX)
+			val = DDRC_FREERUN_READ_CNT_CLR;
+		else
+			val = DDRC_FREERUN_WRITE_CNT_CLR;
+
+		writeq_relaxed(val, pmu->base + DDRC_PERF_CNT_FREERUN_CTRL);
+	}
+
+	hwc->state |= PERF_HES_STOPPED;
+
+	if (flags & PERF_EF_START)
+		cn10k_ddr_perf_event_start(event, flags);
+
+	return 0;
+}
+
+static void cn10k_ddr_perf_event_stop(struct perf_event *event, int flags)
+{
+	struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
+	cn10k_ddr_perf_counter_enable(pmu, counter, false);
+
+	if (flags & PERF_EF_UPDATE)
+		cn10k_ddr_perf_event_update(event);
+
+	hwc->state |= PERF_HES_STOPPED;
+}
+
+static void cn10k_ddr_perf_event_del(struct perf_event *event, int flags)
+{
+	struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
+	cn10k_ddr_perf_event_stop(event, PERF_EF_UPDATE);
+
+	cn10k_ddr_perf_free_counter(pmu, counter);
+	pmu->active_events--;
+	hwc->idx = -1;
+
+	/* Cancel timer when no events to capture */
+	if (pmu->active_events == 0)
+		hrtimer_cancel(&pmu->hrtimer);
+}
+
+static void cn10k_ddr_perf_pmu_enable(struct pmu *pmu)
+{
+	struct cn10k_ddr_pmu *ddr_pmu = to_cn10k_ddr_pmu(pmu);
+
+	writeq_relaxed(START_OP_CTRL_VAL_START, ddr_pmu->base +
+		       DDRC_PERF_CNT_START_OP_CTRL);
+}
+
+static void cn10k_ddr_perf_pmu_disable(struct pmu *pmu)
+{
+	struct cn10k_ddr_pmu *ddr_pmu = to_cn10k_ddr_pmu(pmu);
+
+	writeq_relaxed(END_OP_CTRL_VAL_END, ddr_pmu->base +
+		       DDRC_PERF_CNT_END_OP_CTRL);
+}
+
+static void cn10k_ddr_perf_event_update_all(struct cn10k_ddr_pmu *pmu)
+{
+	struct hw_perf_event *hwc;
+	int i;
+
+	for (i = 0; i < DDRC_PERF_NUM_GEN_COUNTERS; i++) {
+		if (pmu->events[i] == NULL)
+			continue;
+
+		cn10k_ddr_perf_event_update(pmu->events[i]);
+	}
+
+	/* Reset previous count as h/w counter are reset */
+	for (i = 0; i < DDRC_PERF_NUM_GEN_COUNTERS; i++) {
+		if (pmu->events[i] == NULL)
+			continue;
+
+		hwc = &pmu->events[i]->hw;
+		local64_set(&hwc->prev_count, 0);
+	}
+}
+
+static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu)
+{
+	struct perf_event *event;
+	struct hw_perf_event *hwc;
+	u64 prev_count, new_count;
+	u64 value;
+	int i;
+
+	event = pmu->events[DDRC_PERF_READ_COUNTER_IDX];
+	if (event) {
+		hwc = &event->hw;
+		prev_count = local64_read(&hwc->prev_count);
+		new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx);
+
+		/* Overflow condition is when new count less than
+		 * previous count
+		 */
+		if (new_count < prev_count)
+			cn10k_ddr_perf_event_update(event);
+	}
+
+	event = pmu->events[DDRC_PERF_WRITE_COUNTER_IDX];
+	if (event) {
+		hwc = &event->hw;
+		prev_count = local64_read(&hwc->prev_count);
+		new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx);
+
+		/* Overflow condition is when new count less than
+		 * previous count
+		 */
+		if (new_count < prev_count)
+			cn10k_ddr_perf_event_update(event);
+	}
+
+	for (i = 0; i < DDRC_PERF_NUM_GEN_COUNTERS; i++) {
+		if (pmu->events[i] == NULL)
+			continue;
+
+		value = cn10k_ddr_perf_read_counter(pmu, i);
+		if (value == DDRC_PERF_CNT_MAX_VALUE) {
+			pr_info("Counter-(%d) reached max value\n", i);
+			cn10k_ddr_perf_event_update_all(pmu);
+			cn10k_ddr_perf_pmu_disable(&pmu->pmu);
+			cn10k_ddr_perf_pmu_enable(&pmu->pmu);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static enum hrtimer_restart cn10k_ddr_pmu_timer_handler(struct hrtimer *hrtimer)
+{
+	struct cn10k_ddr_pmu *pmu = container_of(hrtimer, struct cn10k_ddr_pmu,
+						 hrtimer);
+	unsigned long flags;
+
+	local_irq_save(flags);
+	cn10k_ddr_pmu_overflow_handler(pmu);
+	local_irq_restore(flags);
+
+	hrtimer_forward_now(hrtimer, cn10k_ddr_pmu_timer_period());
+	return HRTIMER_RESTART;
+}
+
+static int cn10k_ddr_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct cn10k_ddr_pmu *pmu = hlist_entry_safe(node, struct cn10k_ddr_pmu,
+						     node);
+	unsigned int target;
+
+	if (cpu != pmu->cpu)
+		return 0;
+
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	perf_pmu_migrate_context(&pmu->pmu, cpu, target);
+	pmu->cpu = target;
+	return 0;
+}
+
+static int cn10k_ddr_perf_probe(struct platform_device *pdev)
+{
+	struct cn10k_ddr_pmu *ddr_pmu;
+	struct resource *res;
+	void __iomem *base;
+	char *name;
+	int ret;
+
+	ddr_pmu = devm_kzalloc(&pdev->dev, sizeof(*ddr_pmu), GFP_KERNEL);
+	if (!ddr_pmu)
+		return -ENOMEM;
+
+	ddr_pmu->dev = &pdev->dev;
+	platform_set_drvdata(pdev, ddr_pmu);
+
+	base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	ddr_pmu->base = base;
+
+	/* Setup the PMU counter to work in manual mode */
+	writeq_relaxed(OP_MODE_CTRL_VAL_MANNUAL, ddr_pmu->base +
+		       DDRC_PERF_CNT_OP_MODE_CTRL);
+
+	ddr_pmu->pmu = (struct pmu) {
+		.module	      = THIS_MODULE,
+		.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+		.task_ctx_nr = perf_invalid_context,
+		.attr_groups = cn10k_attr_groups,
+		.event_init  = cn10k_ddr_perf_event_init,
+		.add	     = cn10k_ddr_perf_event_add,
+		.del	     = cn10k_ddr_perf_event_del,
+		.start	     = cn10k_ddr_perf_event_start,
+		.stop	     = cn10k_ddr_perf_event_stop,
+		.read	     = cn10k_ddr_perf_event_update,
+		.pmu_enable  = cn10k_ddr_perf_pmu_enable,
+		.pmu_disable = cn10k_ddr_perf_pmu_disable,
+	};
+
+	/* Choose this cpu to collect perf data */
+	ddr_pmu->cpu = raw_smp_processor_id();
+
+	name = devm_kasprintf(ddr_pmu->dev, GFP_KERNEL, "mrvl_ddr_pmu_%llx",
+			      res->start);
+	if (!name)
+		return -ENOMEM;
+
+	hrtimer_init(&ddr_pmu->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	ddr_pmu->hrtimer.function = cn10k_ddr_pmu_timer_handler;
+
+	cpuhp_state_add_instance_nocalls(
+				CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE,
+				&ddr_pmu->node);
+
+	ret = perf_pmu_register(&ddr_pmu->pmu, name, -1);
+	if (ret)
+		goto error;
+
+	pr_info("CN10K DDR PMU Driver for ddrc@%llx\n", res->start);
+	return 0;
+error:
+	cpuhp_state_remove_instance_nocalls(
+				CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE,
+				&ddr_pmu->node);
+	return ret;
+}
+
+static int cn10k_ddr_perf_remove(struct platform_device *pdev)
+{
+	struct cn10k_ddr_pmu *ddr_pmu = platform_get_drvdata(pdev);
+
+	cpuhp_state_remove_instance_nocalls(
+				CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE,
+				&ddr_pmu->node);
+
+	perf_pmu_unregister(&ddr_pmu->pmu);
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id cn10k_ddr_pmu_of_match[] = {
+	{ .compatible = "marvell,cn10k-ddr-pmu", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, cn10k_ddr_pmu_of_match);
+#endif
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id cn10k_ddr_pmu_acpi_match[] = {
+	{"MRVL000A", 0},
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, cn10k_ddr_pmu_acpi_match);
+#endif
+
+static struct platform_driver cn10k_ddr_pmu_driver = {
+	.driver	= {
+		.name   = "cn10k-ddr-pmu",
+		.of_match_table = of_match_ptr(cn10k_ddr_pmu_of_match),
+		.acpi_match_table  = ACPI_PTR(cn10k_ddr_pmu_acpi_match),
+		.suppress_bind_attrs = true,
+	},
+	.probe		= cn10k_ddr_perf_probe,
+	.remove		= cn10k_ddr_perf_remove,
+};
+
+static int __init cn10k_ddr_pmu_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(
+				CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE,
+				"perf/marvell/cn10k/ddr:online", NULL,
+				cn10k_ddr_pmu_offline_cpu);
+	if (ret)
+		return ret;
+
+	ret = platform_driver_register(&cn10k_ddr_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(
+				CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE);
+	return ret;
+}
+
+static void __exit cn10k_ddr_pmu_exit(void)
+{
+	platform_driver_unregister(&cn10k_ddr_pmu_driver);
+	cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE);
+}
+
+module_init(cn10k_ddr_pmu_init);
+module_exit(cn10k_ddr_pmu_exit);
+
+MODULE_AUTHOR("Bharat Bhushan <bbhushan2@marvell.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c
new file mode 100644
index 0000000000..fec8e82edb
--- /dev/null
+++ b/drivers/perf/marvell_cn10k_tad_pmu.c
@@ -0,0 +1,438 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell CN10K LLC-TAD perf driver
+ *
+ * Copyright (C) 2021 Marvell
+ */
+
+#define pr_fmt(fmt) "tad_pmu: " fmt
+
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/cpuhotplug.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/acpi.h>
+
+#define TAD_PFC_OFFSET		0x800
+#define TAD_PFC(counter)	(TAD_PFC_OFFSET | (counter << 3))
+#define TAD_PRF_OFFSET		0x900
+#define TAD_PRF(counter)	(TAD_PRF_OFFSET | (counter << 3))
+#define TAD_PRF_CNTSEL_MASK	0xFF
+#define TAD_MAX_COUNTERS	8
+
+#define to_tad_pmu(p) (container_of(p, struct tad_pmu, pmu))
+
+struct tad_region {
+	void __iomem	*base;
+};
+
+struct tad_pmu {
+	struct pmu pmu;
+	struct tad_region *regions;
+	u32 region_cnt;
+	unsigned int cpu;
+	struct hlist_node node;
+	struct perf_event *events[TAD_MAX_COUNTERS];
+	DECLARE_BITMAP(counters_map, TAD_MAX_COUNTERS);
+};
+
+static int tad_pmu_cpuhp_state;
+
+static void tad_pmu_event_counter_read(struct perf_event *event)
+{
+	struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u32 counter_idx = hwc->idx;
+	u64 prev, new;
+	int i;
+
+	do {
+		prev = local64_read(&hwc->prev_count);
+		for (i = 0, new = 0; i < tad_pmu->region_cnt; i++)
+			new += readq(tad_pmu->regions[i].base +
+				     TAD_PFC(counter_idx));
+	} while (local64_cmpxchg(&hwc->prev_count, prev, new) != prev);
+
+	local64_add(new - prev, &event->count);
+}
+
+static void tad_pmu_event_counter_stop(struct perf_event *event, int flags)
+{
+	struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u32 counter_idx = hwc->idx;
+	int i;
+
+	/* TAD()_PFC() stop counting on the write
+	 * which sets TAD()_PRF()[CNTSEL] == 0
+	 */
+	for (i = 0; i < tad_pmu->region_cnt; i++) {
+		writeq_relaxed(0, tad_pmu->regions[i].base +
+			       TAD_PRF(counter_idx));
+	}
+
+	tad_pmu_event_counter_read(event);
+	hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static void tad_pmu_event_counter_start(struct perf_event *event, int flags)
+{
+	struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u32 event_idx = event->attr.config;
+	u32 counter_idx = hwc->idx;
+	u64 reg_val;
+	int i;
+
+	hwc->state = 0;
+
+	/* Typically TAD_PFC() are zeroed to start counting */
+	for (i = 0; i < tad_pmu->region_cnt; i++)
+		writeq_relaxed(0, tad_pmu->regions[i].base +
+			       TAD_PFC(counter_idx));
+
+	/* TAD()_PFC() start counting on the write
+	 * which sets TAD()_PRF()[CNTSEL] != 0
+	 */
+	for (i = 0; i < tad_pmu->region_cnt; i++) {
+		reg_val = event_idx & 0xFF;
+		writeq_relaxed(reg_val,	tad_pmu->regions[i].base +
+			       TAD_PRF(counter_idx));
+	}
+}
+
+static void tad_pmu_event_counter_del(struct perf_event *event, int flags)
+{
+	struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	tad_pmu_event_counter_stop(event, flags | PERF_EF_UPDATE);
+	tad_pmu->events[idx] = NULL;
+	clear_bit(idx, tad_pmu->counters_map);
+}
+
+static int tad_pmu_event_counter_add(struct perf_event *event, int flags)
+{
+	struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+
+	/* Get a free counter for this event */
+	idx = find_first_zero_bit(tad_pmu->counters_map, TAD_MAX_COUNTERS);
+	if (idx == TAD_MAX_COUNTERS)
+		return -EAGAIN;
+
+	set_bit(idx, tad_pmu->counters_map);
+
+	hwc->idx = idx;
+	hwc->state = PERF_HES_STOPPED;
+	tad_pmu->events[idx] = event;
+
+	if (flags & PERF_EF_START)
+		tad_pmu_event_counter_start(event, flags);
+
+	return 0;
+}
+
+static int tad_pmu_event_init(struct perf_event *event)
+{
+	struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu);
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (!event->attr.disabled)
+		return -EINVAL;
+
+	if (event->state != PERF_EVENT_STATE_OFF)
+		return -EINVAL;
+
+	event->cpu = tad_pmu->cpu;
+	event->hw.idx = -1;
+	event->hw.config_base = event->attr.config;
+
+	return 0;
+}
+
+static ssize_t tad_pmu_event_show(struct device *dev,
+				struct device_attribute *attr, char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+	return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+#define TAD_PMU_EVENT_ATTR(name, config)			\
+	PMU_EVENT_ATTR_ID(name, tad_pmu_event_show, config)
+
+static struct attribute *tad_pmu_event_attrs[] = {
+	TAD_PMU_EVENT_ATTR(tad_none, 0x0),
+	TAD_PMU_EVENT_ATTR(tad_req_msh_in_any, 0x1),
+	TAD_PMU_EVENT_ATTR(tad_req_msh_in_mn, 0x2),
+	TAD_PMU_EVENT_ATTR(tad_req_msh_in_exlmn, 0x3),
+	TAD_PMU_EVENT_ATTR(tad_rsp_msh_in_any, 0x4),
+	TAD_PMU_EVENT_ATTR(tad_rsp_msh_in_mn, 0x5),
+	TAD_PMU_EVENT_ATTR(tad_rsp_msh_in_exlmn, 0x6),
+	TAD_PMU_EVENT_ATTR(tad_rsp_msh_in_dss, 0x7),
+	TAD_PMU_EVENT_ATTR(tad_rsp_msh_in_retry_dss, 0x8),
+	TAD_PMU_EVENT_ATTR(tad_dat_msh_in_any, 0x9),
+	TAD_PMU_EVENT_ATTR(tad_dat_msh_in_dss, 0xa),
+	TAD_PMU_EVENT_ATTR(tad_req_msh_out_any, 0xb),
+	TAD_PMU_EVENT_ATTR(tad_req_msh_out_dss_rd, 0xc),
+	TAD_PMU_EVENT_ATTR(tad_req_msh_out_dss_wr, 0xd),
+	TAD_PMU_EVENT_ATTR(tad_req_msh_out_evict, 0xe),
+	TAD_PMU_EVENT_ATTR(tad_rsp_msh_out_any, 0xf),
+	TAD_PMU_EVENT_ATTR(tad_rsp_msh_out_retry_exlmn, 0x10),
+	TAD_PMU_EVENT_ATTR(tad_rsp_msh_out_retry_mn, 0x11),
+	TAD_PMU_EVENT_ATTR(tad_rsp_msh_out_exlmn, 0x12),
+	TAD_PMU_EVENT_ATTR(tad_rsp_msh_out_mn, 0x13),
+	TAD_PMU_EVENT_ATTR(tad_snp_msh_out_any, 0x14),
+	TAD_PMU_EVENT_ATTR(tad_snp_msh_out_mn, 0x15),
+	TAD_PMU_EVENT_ATTR(tad_snp_msh_out_exlmn, 0x16),
+	TAD_PMU_EVENT_ATTR(tad_dat_msh_out_any, 0x17),
+	TAD_PMU_EVENT_ATTR(tad_dat_msh_out_fill, 0x18),
+	TAD_PMU_EVENT_ATTR(tad_dat_msh_out_dss, 0x19),
+	TAD_PMU_EVENT_ATTR(tad_alloc_dtg, 0x1a),
+	TAD_PMU_EVENT_ATTR(tad_alloc_ltg, 0x1b),
+	TAD_PMU_EVENT_ATTR(tad_alloc_any, 0x1c),
+	TAD_PMU_EVENT_ATTR(tad_hit_dtg, 0x1d),
+	TAD_PMU_EVENT_ATTR(tad_hit_ltg, 0x1e),
+	TAD_PMU_EVENT_ATTR(tad_hit_any, 0x1f),
+	TAD_PMU_EVENT_ATTR(tad_tag_rd, 0x20),
+	TAD_PMU_EVENT_ATTR(tad_dat_rd, 0x21),
+	TAD_PMU_EVENT_ATTR(tad_dat_rd_byp, 0x22),
+	TAD_PMU_EVENT_ATTR(tad_ifb_occ, 0x23),
+	TAD_PMU_EVENT_ATTR(tad_req_occ, 0x24),
+	NULL
+};
+
+static const struct attribute_group tad_pmu_events_attr_group = {
+	.name = "events",
+	.attrs = tad_pmu_event_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+
+static struct attribute *tad_pmu_format_attrs[] = {
+	&format_attr_event.attr,
+	NULL
+};
+
+static struct attribute_group tad_pmu_format_attr_group = {
+	.name = "format",
+	.attrs = tad_pmu_format_attrs,
+};
+
+static ssize_t tad_pmu_cpumask_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct tad_pmu *tad_pmu = to_tad_pmu(dev_get_drvdata(dev));
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(tad_pmu->cpu));
+}
+
+static DEVICE_ATTR(cpumask, 0444, tad_pmu_cpumask_show, NULL);
+
+static struct attribute *tad_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL
+};
+
+static struct attribute_group tad_pmu_cpumask_attr_group = {
+	.attrs = tad_pmu_cpumask_attrs,
+};
+
+static const struct attribute_group *tad_pmu_attr_groups[] = {
+	&tad_pmu_events_attr_group,
+	&tad_pmu_format_attr_group,
+	&tad_pmu_cpumask_attr_group,
+	NULL
+};
+
+static int tad_pmu_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct tad_region *regions;
+	struct tad_pmu *tad_pmu;
+	struct resource *res;
+	u32 tad_pmu_page_size;
+	u32 tad_page_size;
+	u32 tad_cnt;
+	int i, ret;
+	char *name;
+
+	tad_pmu = devm_kzalloc(&pdev->dev, sizeof(*tad_pmu), GFP_KERNEL);
+	if (!tad_pmu)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, tad_pmu);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "Mem resource not found\n");
+		return -ENODEV;
+	}
+
+	ret = device_property_read_u32(dev, "marvell,tad-page-size",
+				       &tad_page_size);
+	if (ret) {
+		dev_err(&pdev->dev, "Can't find tad-page-size property\n");
+		return ret;
+	}
+
+	ret = device_property_read_u32(dev, "marvell,tad-pmu-page-size",
+				       &tad_pmu_page_size);
+	if (ret) {
+		dev_err(&pdev->dev, "Can't find tad-pmu-page-size property\n");
+		return ret;
+	}
+
+	ret = device_property_read_u32(dev, "marvell,tad-cnt", &tad_cnt);
+	if (ret) {
+		dev_err(&pdev->dev, "Can't find tad-cnt property\n");
+		return ret;
+	}
+
+	regions = devm_kcalloc(&pdev->dev, tad_cnt,
+			       sizeof(*regions), GFP_KERNEL);
+	if (!regions)
+		return -ENOMEM;
+
+	/* ioremap the distributed TAD pmu regions */
+	for (i = 0; i < tad_cnt && res->start < res->end; i++) {
+		regions[i].base = devm_ioremap(&pdev->dev,
+					       res->start,
+					       tad_pmu_page_size);
+		if (!regions[i].base) {
+			dev_err(&pdev->dev, "TAD%d ioremap fail\n", i);
+			return -ENOMEM;
+		}
+		res->start += tad_page_size;
+	}
+
+	tad_pmu->regions = regions;
+	tad_pmu->region_cnt = tad_cnt;
+
+	tad_pmu->pmu = (struct pmu) {
+
+		.module		= THIS_MODULE,
+		.attr_groups	= tad_pmu_attr_groups,
+		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE |
+				  PERF_PMU_CAP_NO_INTERRUPT,
+		.task_ctx_nr	= perf_invalid_context,
+
+		.event_init	= tad_pmu_event_init,
+		.add		= tad_pmu_event_counter_add,
+		.del		= tad_pmu_event_counter_del,
+		.start		= tad_pmu_event_counter_start,
+		.stop		= tad_pmu_event_counter_stop,
+		.read		= tad_pmu_event_counter_read,
+	};
+
+	tad_pmu->cpu = raw_smp_processor_id();
+
+	/* Register pmu instance for cpu hotplug */
+	ret = cpuhp_state_add_instance_nocalls(tad_pmu_cpuhp_state,
+					       &tad_pmu->node);
+	if (ret) {
+		dev_err(&pdev->dev, "Error %d registering hotplug\n", ret);
+		return ret;
+	}
+
+	name = "tad";
+	ret = perf_pmu_register(&tad_pmu->pmu, name, -1);
+	if (ret)
+		cpuhp_state_remove_instance_nocalls(tad_pmu_cpuhp_state,
+						    &tad_pmu->node);
+
+	return ret;
+}
+
+static int tad_pmu_remove(struct platform_device *pdev)
+{
+	struct tad_pmu *pmu = platform_get_drvdata(pdev);
+
+	cpuhp_state_remove_instance_nocalls(tad_pmu_cpuhp_state,
+						&pmu->node);
+	perf_pmu_unregister(&pmu->pmu);
+
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id tad_pmu_of_match[] = {
+	{ .compatible = "marvell,cn10k-tad-pmu", },
+	{},
+};
+#endif
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id tad_pmu_acpi_match[] = {
+	{"MRVL000B", 0},
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, tad_pmu_acpi_match);
+#endif
+
+static struct platform_driver tad_pmu_driver = {
+	.driver         = {
+		.name   = "cn10k_tad_pmu",
+		.of_match_table = of_match_ptr(tad_pmu_of_match),
+		.acpi_match_table = ACPI_PTR(tad_pmu_acpi_match),
+		.suppress_bind_attrs = true,
+	},
+	.probe          = tad_pmu_probe,
+	.remove         = tad_pmu_remove,
+};
+
+static int tad_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct tad_pmu *pmu = hlist_entry_safe(node, struct tad_pmu, node);
+	unsigned int target;
+
+	if (cpu != pmu->cpu)
+		return 0;
+
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	perf_pmu_migrate_context(&pmu->pmu, cpu, target);
+	pmu->cpu = target;
+
+	return 0;
+}
+
+static int __init tad_pmu_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+				      "perf/cn10k/tadpmu:online",
+				      NULL,
+				      tad_pmu_offline_cpu);
+	if (ret < 0)
+		return ret;
+	tad_pmu_cpuhp_state = ret;
+	ret = platform_driver_register(&tad_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(tad_pmu_cpuhp_state);
+
+	return ret;
+}
+
+static void __exit tad_pmu_exit(void)
+{
+	platform_driver_unregister(&tad_pmu_driver);
+	cpuhp_remove_multi_state(tad_pmu_cpuhp_state);
+}
+
+module_init(tad_pmu_init);
+module_exit(tad_pmu_exit);
+
+MODULE_DESCRIPTION("Marvell CN10K LLC-TAD Perf driver");
+MODULE_AUTHOR("Bhaskara Budiredla <bbudiredla@marvell.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c
new file mode 100644
index 0000000000..3f9a98c17a
--- /dev/null
+++ b/drivers/perf/qcom_l2_pmu.c
@@ -0,0 +1,1002 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2015-2017 The Linux Foundation. All rights reserved.
+ */
+#include <linux/acpi.h>
+#include <linux/bitops.h>
+#include <linux/bug.h>
+#include <linux/cpuhotplug.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/percpu.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+
+#include <asm/barrier.h>
+#include <asm/local64.h>
+#include <asm/sysreg.h>
+#include <soc/qcom/kryo-l2-accessors.h>
+
+#define MAX_L2_CTRS             9
+
+#define L2PMCR_NUM_EV_SHIFT     11
+#define L2PMCR_NUM_EV_MASK      0x1F
+
+#define L2PMCR                  0x400
+#define L2PMCNTENCLR            0x403
+#define L2PMCNTENSET            0x404
+#define L2PMINTENCLR            0x405
+#define L2PMINTENSET            0x406
+#define L2PMOVSCLR              0x407
+#define L2PMOVSSET              0x408
+#define L2PMCCNTCR              0x409
+#define L2PMCCNTR               0x40A
+#define L2PMCCNTSR              0x40C
+#define L2PMRESR                0x410
+#define IA_L2PMXEVCNTCR_BASE    0x420
+#define IA_L2PMXEVCNTR_BASE     0x421
+#define IA_L2PMXEVFILTER_BASE   0x423
+#define IA_L2PMXEVTYPER_BASE    0x424
+
+#define IA_L2_REG_OFFSET        0x10
+
+#define L2PMXEVFILTER_SUFILTER_ALL      0x000E0000
+#define L2PMXEVFILTER_ORGFILTER_IDINDEP 0x00000004
+#define L2PMXEVFILTER_ORGFILTER_ALL     0x00000003
+
+#define L2EVTYPER_REG_SHIFT     3
+
+#define L2PMRESR_GROUP_BITS     8
+#define L2PMRESR_GROUP_MASK     GENMASK(7, 0)
+
+#define L2CYCLE_CTR_BIT         31
+#define L2CYCLE_CTR_RAW_CODE    0xFE
+
+#define L2PMCR_RESET_ALL        0x6
+#define L2PMCR_COUNTERS_ENABLE  0x1
+#define L2PMCR_COUNTERS_DISABLE 0x0
+
+#define L2PMRESR_EN             BIT_ULL(63)
+
+#define L2_EVT_MASK             0x00000FFF
+#define L2_EVT_CODE_MASK        0x00000FF0
+#define L2_EVT_GRP_MASK         0x0000000F
+#define L2_EVT_CODE_SHIFT       4
+#define L2_EVT_GRP_SHIFT        0
+
+#define L2_EVT_CODE(event)   (((event) & L2_EVT_CODE_MASK) >> L2_EVT_CODE_SHIFT)
+#define L2_EVT_GROUP(event)  (((event) & L2_EVT_GRP_MASK) >> L2_EVT_GRP_SHIFT)
+
+#define L2_EVT_GROUP_MAX        7
+
+#define L2_COUNTER_RELOAD       BIT_ULL(31)
+#define L2_CYCLE_COUNTER_RELOAD BIT_ULL(63)
+
+
+#define reg_idx(reg, i)         (((i) * IA_L2_REG_OFFSET) + reg##_BASE)
+
+/*
+ * Events
+ */
+#define L2_EVENT_CYCLES                    0xfe
+#define L2_EVENT_DCACHE_OPS                0x400
+#define L2_EVENT_ICACHE_OPS                0x401
+#define L2_EVENT_TLBI                      0x402
+#define L2_EVENT_BARRIERS                  0x403
+#define L2_EVENT_TOTAL_READS               0x405
+#define L2_EVENT_TOTAL_WRITES              0x406
+#define L2_EVENT_TOTAL_REQUESTS            0x407
+#define L2_EVENT_LDREX                     0x420
+#define L2_EVENT_STREX                     0x421
+#define L2_EVENT_CLREX                     0x422
+
+
+
+struct cluster_pmu;
+
+/*
+ * Aggregate PMU. Implements the core pmu functions and manages
+ * the hardware PMUs.
+ */
+struct l2cache_pmu {
+	struct hlist_node node;
+	u32 num_pmus;
+	struct pmu pmu;
+	int num_counters;
+	cpumask_t cpumask;
+	struct platform_device *pdev;
+	struct cluster_pmu * __percpu *pmu_cluster;
+	struct list_head clusters;
+};
+
+/*
+ * The cache is made up of one or more clusters, each cluster has its own PMU.
+ * Each cluster is associated with one or more CPUs.
+ * This structure represents one of the hardware PMUs.
+ *
+ * Events can be envisioned as a 2-dimensional array. Each column represents
+ * a group of events. There are 8 groups. Only one entry from each
+ * group can be in use at a time.
+ *
+ * Events are specified as 0xCCG, where CC is 2 hex digits specifying
+ * the code (array row) and G specifies the group (column).
+ *
+ * In addition there is a cycle counter event specified by L2CYCLE_CTR_RAW_CODE
+ * which is outside the above scheme.
+ */
+struct cluster_pmu {
+	struct list_head next;
+	struct perf_event *events[MAX_L2_CTRS];
+	struct l2cache_pmu *l2cache_pmu;
+	DECLARE_BITMAP(used_counters, MAX_L2_CTRS);
+	DECLARE_BITMAP(used_groups, L2_EVT_GROUP_MAX + 1);
+	int irq;
+	int cluster_id;
+	/* The CPU that is used for collecting events on this cluster */
+	int on_cpu;
+	/* All the CPUs associated with this cluster */
+	cpumask_t cluster_cpus;
+	spinlock_t pmu_lock;
+};
+
+#define to_l2cache_pmu(p) (container_of(p, struct l2cache_pmu, pmu))
+
+static u32 l2_cycle_ctr_idx;
+static u32 l2_counter_present_mask;
+
+static inline u32 idx_to_reg_bit(u32 idx)
+{
+	if (idx == l2_cycle_ctr_idx)
+		return BIT(L2CYCLE_CTR_BIT);
+
+	return BIT(idx);
+}
+
+static inline struct cluster_pmu *get_cluster_pmu(
+	struct l2cache_pmu *l2cache_pmu, int cpu)
+{
+	return *per_cpu_ptr(l2cache_pmu->pmu_cluster, cpu);
+}
+
+static void cluster_pmu_reset(void)
+{
+	/* Reset all counters */
+	kryo_l2_set_indirect_reg(L2PMCR, L2PMCR_RESET_ALL);
+	kryo_l2_set_indirect_reg(L2PMCNTENCLR, l2_counter_present_mask);
+	kryo_l2_set_indirect_reg(L2PMINTENCLR, l2_counter_present_mask);
+	kryo_l2_set_indirect_reg(L2PMOVSCLR, l2_counter_present_mask);
+}
+
+static inline void cluster_pmu_enable(void)
+{
+	kryo_l2_set_indirect_reg(L2PMCR, L2PMCR_COUNTERS_ENABLE);
+}
+
+static inline void cluster_pmu_disable(void)
+{
+	kryo_l2_set_indirect_reg(L2PMCR, L2PMCR_COUNTERS_DISABLE);
+}
+
+static inline void cluster_pmu_counter_set_value(u32 idx, u64 value)
+{
+	if (idx == l2_cycle_ctr_idx)
+		kryo_l2_set_indirect_reg(L2PMCCNTR, value);
+	else
+		kryo_l2_set_indirect_reg(reg_idx(IA_L2PMXEVCNTR, idx), value);
+}
+
+static inline u64 cluster_pmu_counter_get_value(u32 idx)
+{
+	u64 value;
+
+	if (idx == l2_cycle_ctr_idx)
+		value = kryo_l2_get_indirect_reg(L2PMCCNTR);
+	else
+		value = kryo_l2_get_indirect_reg(reg_idx(IA_L2PMXEVCNTR, idx));
+
+	return value;
+}
+
+static inline void cluster_pmu_counter_enable(u32 idx)
+{
+	kryo_l2_set_indirect_reg(L2PMCNTENSET, idx_to_reg_bit(idx));
+}
+
+static inline void cluster_pmu_counter_disable(u32 idx)
+{
+	kryo_l2_set_indirect_reg(L2PMCNTENCLR, idx_to_reg_bit(idx));
+}
+
+static inline void cluster_pmu_counter_enable_interrupt(u32 idx)
+{
+	kryo_l2_set_indirect_reg(L2PMINTENSET, idx_to_reg_bit(idx));
+}
+
+static inline void cluster_pmu_counter_disable_interrupt(u32 idx)
+{
+	kryo_l2_set_indirect_reg(L2PMINTENCLR, idx_to_reg_bit(idx));
+}
+
+static inline void cluster_pmu_set_evccntcr(u32 val)
+{
+	kryo_l2_set_indirect_reg(L2PMCCNTCR, val);
+}
+
+static inline void cluster_pmu_set_evcntcr(u32 ctr, u32 val)
+{
+	kryo_l2_set_indirect_reg(reg_idx(IA_L2PMXEVCNTCR, ctr), val);
+}
+
+static inline void cluster_pmu_set_evtyper(u32 ctr, u32 val)
+{
+	kryo_l2_set_indirect_reg(reg_idx(IA_L2PMXEVTYPER, ctr), val);
+}
+
+static void cluster_pmu_set_resr(struct cluster_pmu *cluster,
+			       u32 event_group, u32 event_cc)
+{
+	u64 field;
+	u64 resr_val;
+	u32 shift;
+	unsigned long flags;
+
+	shift = L2PMRESR_GROUP_BITS * event_group;
+	field = ((u64)(event_cc & L2PMRESR_GROUP_MASK) << shift);
+
+	spin_lock_irqsave(&cluster->pmu_lock, flags);
+
+	resr_val = kryo_l2_get_indirect_reg(L2PMRESR);
+	resr_val &= ~(L2PMRESR_GROUP_MASK << shift);
+	resr_val |= field;
+	resr_val |= L2PMRESR_EN;
+	kryo_l2_set_indirect_reg(L2PMRESR, resr_val);
+
+	spin_unlock_irqrestore(&cluster->pmu_lock, flags);
+}
+
+/*
+ * Hardware allows filtering of events based on the originating
+ * CPU. Turn this off by setting filter bits to allow events from
+ * all CPUS, subunits and ID independent events in this cluster.
+ */
+static inline void cluster_pmu_set_evfilter_sys_mode(u32 ctr)
+{
+	u32 val =  L2PMXEVFILTER_SUFILTER_ALL |
+		   L2PMXEVFILTER_ORGFILTER_IDINDEP |
+		   L2PMXEVFILTER_ORGFILTER_ALL;
+
+	kryo_l2_set_indirect_reg(reg_idx(IA_L2PMXEVFILTER, ctr), val);
+}
+
+static inline u32 cluster_pmu_getreset_ovsr(void)
+{
+	u32 result = kryo_l2_get_indirect_reg(L2PMOVSSET);
+
+	kryo_l2_set_indirect_reg(L2PMOVSCLR, result);
+	return result;
+}
+
+static inline bool cluster_pmu_has_overflowed(u32 ovsr)
+{
+	return !!(ovsr & l2_counter_present_mask);
+}
+
+static inline bool cluster_pmu_counter_has_overflowed(u32 ovsr, u32 idx)
+{
+	return !!(ovsr & idx_to_reg_bit(idx));
+}
+
+static void l2_cache_event_update(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 delta, prev, now;
+	u32 idx = hwc->idx;
+
+	do {
+		prev = local64_read(&hwc->prev_count);
+		now = cluster_pmu_counter_get_value(idx);
+	} while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
+
+	/*
+	 * The cycle counter is 64-bit, but all other counters are
+	 * 32-bit, and we must handle 32-bit overflow explicitly.
+	 */
+	delta = now - prev;
+	if (idx != l2_cycle_ctr_idx)
+		delta &= 0xffffffff;
+
+	local64_add(delta, &event->count);
+}
+
+static void l2_cache_cluster_set_period(struct cluster_pmu *cluster,
+				       struct hw_perf_event *hwc)
+{
+	u32 idx = hwc->idx;
+	u64 new;
+
+	/*
+	 * We limit the max period to half the max counter value so
+	 * that even in the case of extreme interrupt latency the
+	 * counter will (hopefully) not wrap past its initial value.
+	 */
+	if (idx == l2_cycle_ctr_idx)
+		new = L2_CYCLE_COUNTER_RELOAD;
+	else
+		new = L2_COUNTER_RELOAD;
+
+	local64_set(&hwc->prev_count, new);
+	cluster_pmu_counter_set_value(idx, new);
+}
+
+static int l2_cache_get_event_idx(struct cluster_pmu *cluster,
+				   struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+	int num_ctrs = cluster->l2cache_pmu->num_counters - 1;
+	unsigned int group;
+
+	if (hwc->config_base == L2CYCLE_CTR_RAW_CODE) {
+		if (test_and_set_bit(l2_cycle_ctr_idx, cluster->used_counters))
+			return -EAGAIN;
+
+		return l2_cycle_ctr_idx;
+	}
+
+	idx = find_first_zero_bit(cluster->used_counters, num_ctrs);
+	if (idx == num_ctrs)
+		/* The counters are all in use. */
+		return -EAGAIN;
+
+	/*
+	 * Check for column exclusion: event column already in use by another
+	 * event. This is for events which are not in the same group.
+	 * Conflicting events in the same group are detected in event_init.
+	 */
+	group = L2_EVT_GROUP(hwc->config_base);
+	if (test_bit(group, cluster->used_groups))
+		return -EAGAIN;
+
+	set_bit(idx, cluster->used_counters);
+	set_bit(group, cluster->used_groups);
+
+	return idx;
+}
+
+static void l2_cache_clear_event_idx(struct cluster_pmu *cluster,
+				      struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	clear_bit(idx, cluster->used_counters);
+	if (hwc->config_base != L2CYCLE_CTR_RAW_CODE)
+		clear_bit(L2_EVT_GROUP(hwc->config_base), cluster->used_groups);
+}
+
+static irqreturn_t l2_cache_handle_irq(int irq_num, void *data)
+{
+	struct cluster_pmu *cluster = data;
+	int num_counters = cluster->l2cache_pmu->num_counters;
+	u32 ovsr;
+	int idx;
+
+	ovsr = cluster_pmu_getreset_ovsr();
+	if (!cluster_pmu_has_overflowed(ovsr))
+		return IRQ_NONE;
+
+	for_each_set_bit(idx, cluster->used_counters, num_counters) {
+		struct perf_event *event = cluster->events[idx];
+		struct hw_perf_event *hwc;
+
+		if (WARN_ON_ONCE(!event))
+			continue;
+
+		if (!cluster_pmu_counter_has_overflowed(ovsr, idx))
+			continue;
+
+		l2_cache_event_update(event);
+		hwc = &event->hw;
+
+		l2_cache_cluster_set_period(cluster, hwc);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Implementation of abstract pmu functionality required by
+ * the core perf events code.
+ */
+
+static void l2_cache_pmu_enable(struct pmu *pmu)
+{
+	/*
+	 * Although there is only one PMU (per socket) controlling multiple
+	 * physical PMUs (per cluster), because we do not support per-task mode
+	 * each event is associated with a CPU. Each event has pmu_enable
+	 * called on its CPU, so here it is only necessary to enable the
+	 * counters for the current CPU.
+	 */
+
+	cluster_pmu_enable();
+}
+
+static void l2_cache_pmu_disable(struct pmu *pmu)
+{
+	cluster_pmu_disable();
+}
+
+static int l2_cache_event_init(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct cluster_pmu *cluster;
+	struct perf_event *sibling;
+	struct l2cache_pmu *l2cache_pmu;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	l2cache_pmu = to_l2cache_pmu(event->pmu);
+
+	if (hwc->sample_period) {
+		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+				    "Sampling not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (event->cpu < 0) {
+		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+				    "Per-task mode not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (((L2_EVT_GROUP(event->attr.config) > L2_EVT_GROUP_MAX) ||
+	     ((event->attr.config & ~L2_EVT_MASK) != 0)) &&
+	    (event->attr.config != L2CYCLE_CTR_RAW_CODE)) {
+		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+				    "Invalid config %llx\n",
+				    event->attr.config);
+		return -EINVAL;
+	}
+
+	/* Don't allow groups with mixed PMUs, except for s/w events */
+	if (event->group_leader->pmu != event->pmu &&
+	    !is_software_event(event->group_leader)) {
+		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+			 "Can't create mixed PMU group\n");
+		return -EINVAL;
+	}
+
+	for_each_sibling_event(sibling, event->group_leader) {
+		if (sibling->pmu != event->pmu &&
+		    !is_software_event(sibling)) {
+			dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+				 "Can't create mixed PMU group\n");
+			return -EINVAL;
+		}
+	}
+
+	cluster = get_cluster_pmu(l2cache_pmu, event->cpu);
+	if (!cluster) {
+		/* CPU has not been initialised */
+		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+			"CPU%d not associated with L2 cluster\n", event->cpu);
+		return -EINVAL;
+	}
+
+	/* Ensure all events in a group are on the same cpu */
+	if ((event->group_leader != event) &&
+	    (cluster->on_cpu != event->group_leader->cpu)) {
+		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+			 "Can't create group on CPUs %d and %d",
+			 event->cpu, event->group_leader->cpu);
+		return -EINVAL;
+	}
+
+	if ((event != event->group_leader) &&
+	    !is_software_event(event->group_leader) &&
+	    (L2_EVT_GROUP(event->group_leader->attr.config) ==
+	     L2_EVT_GROUP(event->attr.config))) {
+		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+			 "Column exclusion: conflicting events %llx %llx\n",
+		       event->group_leader->attr.config,
+		       event->attr.config);
+		return -EINVAL;
+	}
+
+	for_each_sibling_event(sibling, event->group_leader) {
+		if ((sibling != event) &&
+		    !is_software_event(sibling) &&
+		    (L2_EVT_GROUP(sibling->attr.config) ==
+		     L2_EVT_GROUP(event->attr.config))) {
+			dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+			     "Column exclusion: conflicting events %llx %llx\n",
+					    sibling->attr.config,
+					    event->attr.config);
+			return -EINVAL;
+		}
+	}
+
+	hwc->idx = -1;
+	hwc->config_base = event->attr.config;
+
+	/*
+	 * Ensure all events are on the same cpu so all events are in the
+	 * same cpu context, to avoid races on pmu_enable etc.
+	 */
+	event->cpu = cluster->on_cpu;
+
+	return 0;
+}
+
+static void l2_cache_event_start(struct perf_event *event, int flags)
+{
+	struct cluster_pmu *cluster;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+	u32 config;
+	u32 event_cc, event_group;
+
+	hwc->state = 0;
+
+	cluster = get_cluster_pmu(to_l2cache_pmu(event->pmu), event->cpu);
+
+	l2_cache_cluster_set_period(cluster, hwc);
+
+	if (hwc->config_base == L2CYCLE_CTR_RAW_CODE) {
+		cluster_pmu_set_evccntcr(0);
+	} else {
+		config = hwc->config_base;
+		event_cc    = L2_EVT_CODE(config);
+		event_group = L2_EVT_GROUP(config);
+
+		cluster_pmu_set_evcntcr(idx, 0);
+		cluster_pmu_set_evtyper(idx, event_group);
+		cluster_pmu_set_resr(cluster, event_group, event_cc);
+		cluster_pmu_set_evfilter_sys_mode(idx);
+	}
+
+	cluster_pmu_counter_enable_interrupt(idx);
+	cluster_pmu_counter_enable(idx);
+}
+
+static void l2_cache_event_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (hwc->state & PERF_HES_STOPPED)
+		return;
+
+	cluster_pmu_counter_disable_interrupt(idx);
+	cluster_pmu_counter_disable(idx);
+
+	if (flags & PERF_EF_UPDATE)
+		l2_cache_event_update(event);
+	hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static int l2_cache_event_add(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+	int err = 0;
+	struct cluster_pmu *cluster;
+
+	cluster = get_cluster_pmu(to_l2cache_pmu(event->pmu), event->cpu);
+
+	idx = l2_cache_get_event_idx(cluster, event);
+	if (idx < 0)
+		return idx;
+
+	hwc->idx = idx;
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	cluster->events[idx] = event;
+	local64_set(&hwc->prev_count, 0);
+
+	if (flags & PERF_EF_START)
+		l2_cache_event_start(event, flags);
+
+	/* Propagate changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+	return err;
+}
+
+static void l2_cache_event_del(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct cluster_pmu *cluster;
+	int idx = hwc->idx;
+
+	cluster = get_cluster_pmu(to_l2cache_pmu(event->pmu), event->cpu);
+
+	l2_cache_event_stop(event, flags | PERF_EF_UPDATE);
+	cluster->events[idx] = NULL;
+	l2_cache_clear_event_idx(cluster, event);
+
+	perf_event_update_userpage(event);
+}
+
+static void l2_cache_event_read(struct perf_event *event)
+{
+	l2_cache_event_update(event);
+}
+
+static ssize_t l2_cache_pmu_cpumask_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct l2cache_pmu *l2cache_pmu = to_l2cache_pmu(dev_get_drvdata(dev));
+
+	return cpumap_print_to_pagebuf(true, buf, &l2cache_pmu->cpumask);
+}
+
+static struct device_attribute l2_cache_pmu_cpumask_attr =
+		__ATTR(cpumask, S_IRUGO, l2_cache_pmu_cpumask_show, NULL);
+
+static struct attribute *l2_cache_pmu_cpumask_attrs[] = {
+	&l2_cache_pmu_cpumask_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group l2_cache_pmu_cpumask_group = {
+	.attrs = l2_cache_pmu_cpumask_attrs,
+};
+
+/* CCG format for perf RAW codes. */
+PMU_FORMAT_ATTR(l2_code,   "config:4-11");
+PMU_FORMAT_ATTR(l2_group,  "config:0-3");
+PMU_FORMAT_ATTR(event,     "config:0-11");
+
+static struct attribute *l2_cache_pmu_formats[] = {
+	&format_attr_l2_code.attr,
+	&format_attr_l2_group.attr,
+	&format_attr_event.attr,
+	NULL,
+};
+
+static const struct attribute_group l2_cache_pmu_format_group = {
+	.name = "format",
+	.attrs = l2_cache_pmu_formats,
+};
+
+static ssize_t l2cache_pmu_event_show(struct device *dev,
+				      struct device_attribute *attr, char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+	return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+#define L2CACHE_EVENT_ATTR(_name, _id)			    \
+	PMU_EVENT_ATTR_ID(_name, l2cache_pmu_event_show, _id)
+
+static struct attribute *l2_cache_pmu_events[] = {
+	L2CACHE_EVENT_ATTR(cycles, L2_EVENT_CYCLES),
+	L2CACHE_EVENT_ATTR(dcache-ops, L2_EVENT_DCACHE_OPS),
+	L2CACHE_EVENT_ATTR(icache-ops, L2_EVENT_ICACHE_OPS),
+	L2CACHE_EVENT_ATTR(tlbi, L2_EVENT_TLBI),
+	L2CACHE_EVENT_ATTR(barriers, L2_EVENT_BARRIERS),
+	L2CACHE_EVENT_ATTR(total-reads, L2_EVENT_TOTAL_READS),
+	L2CACHE_EVENT_ATTR(total-writes, L2_EVENT_TOTAL_WRITES),
+	L2CACHE_EVENT_ATTR(total-requests, L2_EVENT_TOTAL_REQUESTS),
+	L2CACHE_EVENT_ATTR(ldrex, L2_EVENT_LDREX),
+	L2CACHE_EVENT_ATTR(strex, L2_EVENT_STREX),
+	L2CACHE_EVENT_ATTR(clrex, L2_EVENT_CLREX),
+	NULL
+};
+
+static const struct attribute_group l2_cache_pmu_events_group = {
+	.name = "events",
+	.attrs = l2_cache_pmu_events,
+};
+
+static const struct attribute_group *l2_cache_pmu_attr_grps[] = {
+	&l2_cache_pmu_format_group,
+	&l2_cache_pmu_cpumask_group,
+	&l2_cache_pmu_events_group,
+	NULL,
+};
+
+/*
+ * Generic device handlers
+ */
+
+static const struct acpi_device_id l2_cache_pmu_acpi_match[] = {
+	{ "QCOM8130", },
+	{ }
+};
+
+static int get_num_counters(void)
+{
+	int val;
+
+	val = kryo_l2_get_indirect_reg(L2PMCR);
+
+	/*
+	 * Read number of counters from L2PMCR and add 1
+	 * for the cycle counter.
+	 */
+	return ((val >> L2PMCR_NUM_EV_SHIFT) & L2PMCR_NUM_EV_MASK) + 1;
+}
+
+static struct cluster_pmu *l2_cache_associate_cpu_with_cluster(
+	struct l2cache_pmu *l2cache_pmu, int cpu)
+{
+	u64 mpidr;
+	int cpu_cluster_id;
+	struct cluster_pmu *cluster;
+
+	/*
+	 * This assumes that the cluster_id is in MPIDR[aff1] for
+	 * single-threaded cores, and MPIDR[aff2] for multi-threaded
+	 * cores. This logic will have to be updated if this changes.
+	 */
+	mpidr = read_cpuid_mpidr();
+	if (mpidr & MPIDR_MT_BITMASK)
+		cpu_cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);
+	else
+		cpu_cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	list_for_each_entry(cluster, &l2cache_pmu->clusters, next) {
+		if (cluster->cluster_id != cpu_cluster_id)
+			continue;
+
+		dev_info(&l2cache_pmu->pdev->dev,
+			 "CPU%d associated with cluster %d\n", cpu,
+			 cluster->cluster_id);
+		cpumask_set_cpu(cpu, &cluster->cluster_cpus);
+		*per_cpu_ptr(l2cache_pmu->pmu_cluster, cpu) = cluster;
+		return cluster;
+	}
+
+	return NULL;
+}
+
+static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct cluster_pmu *cluster;
+	struct l2cache_pmu *l2cache_pmu;
+
+	l2cache_pmu = hlist_entry_safe(node, struct l2cache_pmu, node);
+	cluster = get_cluster_pmu(l2cache_pmu, cpu);
+	if (!cluster) {
+		/* First time this CPU has come online */
+		cluster = l2_cache_associate_cpu_with_cluster(l2cache_pmu, cpu);
+		if (!cluster) {
+			/* Only if broken firmware doesn't list every cluster */
+			WARN_ONCE(1, "No L2 cache cluster for CPU%d\n", cpu);
+			return 0;
+		}
+	}
+
+	/* If another CPU is managing this cluster, we're done */
+	if (cluster->on_cpu != -1)
+		return 0;
+
+	/*
+	 * All CPUs on this cluster were down, use this one.
+	 * Reset to put it into sane state.
+	 */
+	cluster->on_cpu = cpu;
+	cpumask_set_cpu(cpu, &l2cache_pmu->cpumask);
+	cluster_pmu_reset();
+
+	WARN_ON(irq_set_affinity(cluster->irq, cpumask_of(cpu)));
+	enable_irq(cluster->irq);
+
+	return 0;
+}
+
+static int l2cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct cluster_pmu *cluster;
+	struct l2cache_pmu *l2cache_pmu;
+	cpumask_t cluster_online_cpus;
+	unsigned int target;
+
+	l2cache_pmu = hlist_entry_safe(node, struct l2cache_pmu, node);
+	cluster = get_cluster_pmu(l2cache_pmu, cpu);
+	if (!cluster)
+		return 0;
+
+	/* If this CPU is not managing the cluster, we're done */
+	if (cluster->on_cpu != cpu)
+		return 0;
+
+	/* Give up ownership of cluster */
+	cpumask_clear_cpu(cpu, &l2cache_pmu->cpumask);
+	cluster->on_cpu = -1;
+
+	/* Any other CPU for this cluster which is still online */
+	cpumask_and(&cluster_online_cpus, &cluster->cluster_cpus,
+		    cpu_online_mask);
+	target = cpumask_any_but(&cluster_online_cpus, cpu);
+	if (target >= nr_cpu_ids) {
+		disable_irq(cluster->irq);
+		return 0;
+	}
+
+	perf_pmu_migrate_context(&l2cache_pmu->pmu, cpu, target);
+	cluster->on_cpu = target;
+	cpumask_set_cpu(target, &l2cache_pmu->cpumask);
+	WARN_ON(irq_set_affinity(cluster->irq, cpumask_of(target)));
+
+	return 0;
+}
+
+static int l2_cache_pmu_probe_cluster(struct device *dev, void *data)
+{
+	struct platform_device *pdev = to_platform_device(dev->parent);
+	struct platform_device *sdev = to_platform_device(dev);
+	struct l2cache_pmu *l2cache_pmu = data;
+	struct cluster_pmu *cluster;
+	u64 fw_cluster_id;
+	int err;
+	int irq;
+
+	err = acpi_dev_uid_to_integer(ACPI_COMPANION(dev), &fw_cluster_id);
+	if (err) {
+		dev_err(&pdev->dev, "unable to read ACPI uid\n");
+		return err;
+	}
+
+	cluster = devm_kzalloc(&pdev->dev, sizeof(*cluster), GFP_KERNEL);
+	if (!cluster)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&cluster->next);
+	cluster->cluster_id = fw_cluster_id;
+
+	irq = platform_get_irq(sdev, 0);
+	if (irq < 0)
+		return irq;
+	cluster->irq = irq;
+
+	cluster->l2cache_pmu = l2cache_pmu;
+	cluster->on_cpu = -1;
+
+	err = devm_request_irq(&pdev->dev, irq, l2_cache_handle_irq,
+			       IRQF_NOBALANCING | IRQF_NO_THREAD |
+			       IRQF_NO_AUTOEN,
+			       "l2-cache-pmu", cluster);
+	if (err) {
+		dev_err(&pdev->dev,
+			"Unable to request IRQ%d for L2 PMU counters\n", irq);
+		return err;
+	}
+
+	dev_info(&pdev->dev,
+		 "Registered L2 cache PMU cluster %lld\n", fw_cluster_id);
+
+	spin_lock_init(&cluster->pmu_lock);
+
+	list_add(&cluster->next, &l2cache_pmu->clusters);
+	l2cache_pmu->num_pmus++;
+
+	return 0;
+}
+
+static int l2_cache_pmu_probe(struct platform_device *pdev)
+{
+	int err;
+	struct l2cache_pmu *l2cache_pmu;
+
+	l2cache_pmu =
+		devm_kzalloc(&pdev->dev, sizeof(*l2cache_pmu), GFP_KERNEL);
+	if (!l2cache_pmu)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&l2cache_pmu->clusters);
+
+	platform_set_drvdata(pdev, l2cache_pmu);
+	l2cache_pmu->pmu = (struct pmu) {
+		/* suffix is instance id for future use with multiple sockets */
+		.name		= "l2cache_0",
+		.task_ctx_nr    = perf_invalid_context,
+		.pmu_enable	= l2_cache_pmu_enable,
+		.pmu_disable	= l2_cache_pmu_disable,
+		.event_init	= l2_cache_event_init,
+		.add		= l2_cache_event_add,
+		.del		= l2_cache_event_del,
+		.start		= l2_cache_event_start,
+		.stop		= l2_cache_event_stop,
+		.read		= l2_cache_event_read,
+		.attr_groups	= l2_cache_pmu_attr_grps,
+		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
+	};
+
+	l2cache_pmu->num_counters = get_num_counters();
+	l2cache_pmu->pdev = pdev;
+	l2cache_pmu->pmu_cluster = devm_alloc_percpu(&pdev->dev,
+						     struct cluster_pmu *);
+	if (!l2cache_pmu->pmu_cluster)
+		return -ENOMEM;
+
+	l2_cycle_ctr_idx = l2cache_pmu->num_counters - 1;
+	l2_counter_present_mask = GENMASK(l2cache_pmu->num_counters - 2, 0) |
+		BIT(L2CYCLE_CTR_BIT);
+
+	cpumask_clear(&l2cache_pmu->cpumask);
+
+	/* Read cluster info and initialize each cluster */
+	err = device_for_each_child(&pdev->dev, l2cache_pmu,
+				    l2_cache_pmu_probe_cluster);
+	if (err)
+		return err;
+
+	if (l2cache_pmu->num_pmus == 0) {
+		dev_err(&pdev->dev, "No hardware L2 cache PMUs found\n");
+		return -ENODEV;
+	}
+
+	err = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
+				       &l2cache_pmu->node);
+	if (err) {
+		dev_err(&pdev->dev, "Error %d registering hotplug", err);
+		return err;
+	}
+
+	err = perf_pmu_register(&l2cache_pmu->pmu, l2cache_pmu->pmu.name, -1);
+	if (err) {
+		dev_err(&pdev->dev, "Error %d registering L2 cache PMU\n", err);
+		goto out_unregister;
+	}
+
+	dev_info(&pdev->dev, "Registered L2 cache PMU using %d HW PMUs\n",
+		 l2cache_pmu->num_pmus);
+
+	return err;
+
+out_unregister:
+	cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
+				    &l2cache_pmu->node);
+	return err;
+}
+
+static int l2_cache_pmu_remove(struct platform_device *pdev)
+{
+	struct l2cache_pmu *l2cache_pmu =
+		to_l2cache_pmu(platform_get_drvdata(pdev));
+
+	perf_pmu_unregister(&l2cache_pmu->pmu);
+	cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
+				    &l2cache_pmu->node);
+	return 0;
+}
+
+static struct platform_driver l2_cache_pmu_driver = {
+	.driver = {
+		.name = "qcom-l2cache-pmu",
+		.acpi_match_table = ACPI_PTR(l2_cache_pmu_acpi_match),
+		.suppress_bind_attrs = true,
+	},
+	.probe = l2_cache_pmu_probe,
+	.remove = l2_cache_pmu_remove,
+};
+
+static int __init register_l2_cache_pmu_driver(void)
+{
+	int err;
+
+	err = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
+				      "AP_PERF_ARM_QCOM_L2_ONLINE",
+				      l2cache_pmu_online_cpu,
+				      l2cache_pmu_offline_cpu);
+	if (err)
+		return err;
+
+	return platform_driver_register(&l2_cache_pmu_driver);
+}
+device_initcall(register_l2_cache_pmu_driver);
diff --git a/drivers/perf/qcom_l3_pmu.c b/drivers/perf/qcom_l3_pmu.c
new file mode 100644
index 0000000000..2887edb4eb
--- /dev/null
+++ b/drivers/perf/qcom_l3_pmu.c
@@ -0,0 +1,831 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Driver for the L3 cache PMUs in Qualcomm Technologies chips.
+ *
+ * The driver supports a distributed cache architecture where the overall
+ * cache for a socket is comprised of multiple slices each with its own PMU.
+ * Access to each individual PMU is provided even though all CPUs share all
+ * the slices. User space needs to aggregate to individual counts to provide
+ * a global picture.
+ *
+ * See Documentation/admin-guide/perf/qcom_l3_pmu.rst for more details.
+ *
+ * Copyright (c) 2015-2017, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+
+/*
+ * General constants
+ */
+
+/* Number of counters on each PMU */
+#define L3_NUM_COUNTERS  8
+/* Mask for the event type field within perf_event_attr.config and EVTYPE reg */
+#define L3_EVTYPE_MASK   0xFF
+/*
+ * Bit position of the 'long counter' flag within perf_event_attr.config.
+ * Reserve some space between the event type and this flag to allow expansion
+ * in the event type field.
+ */
+#define L3_EVENT_LC_BIT  32
+
+/*
+ * Register offsets
+ */
+
+/* Perfmon registers */
+#define L3_HML3_PM_CR       0x000
+#define L3_HML3_PM_EVCNTR(__cntr) (0x420 + ((__cntr) & 0x7) * 8)
+#define L3_HML3_PM_CNTCTL(__cntr) (0x120 + ((__cntr) & 0x7) * 8)
+#define L3_HML3_PM_EVTYPE(__cntr) (0x220 + ((__cntr) & 0x7) * 8)
+#define L3_HML3_PM_FILTRA   0x300
+#define L3_HML3_PM_FILTRB   0x308
+#define L3_HML3_PM_FILTRC   0x310
+#define L3_HML3_PM_FILTRAM  0x304
+#define L3_HML3_PM_FILTRBM  0x30C
+#define L3_HML3_PM_FILTRCM  0x314
+
+/* Basic counter registers */
+#define L3_M_BC_CR         0x500
+#define L3_M_BC_SATROLL_CR 0x504
+#define L3_M_BC_CNTENSET   0x508
+#define L3_M_BC_CNTENCLR   0x50C
+#define L3_M_BC_INTENSET   0x510
+#define L3_M_BC_INTENCLR   0x514
+#define L3_M_BC_GANG       0x718
+#define L3_M_BC_OVSR       0x740
+#define L3_M_BC_IRQCTL     0x96C
+
+/*
+ * Bit field definitions
+ */
+
+/* L3_HML3_PM_CR */
+#define PM_CR_RESET           (0)
+
+/* L3_HML3_PM_XCNTCTL/L3_HML3_PM_CNTCTLx */
+#define PMCNT_RESET           (0)
+
+/* L3_HML3_PM_EVTYPEx */
+#define EVSEL(__val)          ((__val) & L3_EVTYPE_MASK)
+
+/* Reset value for all the filter registers */
+#define PM_FLTR_RESET         (0)
+
+/* L3_M_BC_CR */
+#define BC_RESET              (1UL << 1)
+#define BC_ENABLE             (1UL << 0)
+
+/* L3_M_BC_SATROLL_CR */
+#define BC_SATROLL_CR_RESET   (0)
+
+/* L3_M_BC_CNTENSET */
+#define PMCNTENSET(__cntr)    (1UL << ((__cntr) & 0x7))
+
+/* L3_M_BC_CNTENCLR */
+#define PMCNTENCLR(__cntr)    (1UL << ((__cntr) & 0x7))
+#define BC_CNTENCLR_RESET     (0xFF)
+
+/* L3_M_BC_INTENSET */
+#define PMINTENSET(__cntr)    (1UL << ((__cntr) & 0x7))
+
+/* L3_M_BC_INTENCLR */
+#define PMINTENCLR(__cntr)    (1UL << ((__cntr) & 0x7))
+#define BC_INTENCLR_RESET     (0xFF)
+
+/* L3_M_BC_GANG */
+#define GANG_EN(__cntr)       (1UL << ((__cntr) & 0x7))
+#define BC_GANG_RESET         (0)
+
+/* L3_M_BC_OVSR */
+#define PMOVSRCLR(__cntr)     (1UL << ((__cntr) & 0x7))
+#define PMOVSRCLR_RESET       (0xFF)
+
+/* L3_M_BC_IRQCTL */
+#define PMIRQONMSBEN(__cntr)  (1UL << ((__cntr) & 0x7))
+#define BC_IRQCTL_RESET       (0x0)
+
+/*
+ * Events
+ */
+
+#define L3_EVENT_CYCLES		0x01
+#define L3_EVENT_READ_HIT		0x20
+#define L3_EVENT_READ_MISS		0x21
+#define L3_EVENT_READ_HIT_D		0x22
+#define L3_EVENT_READ_MISS_D		0x23
+#define L3_EVENT_WRITE_HIT		0x24
+#define L3_EVENT_WRITE_MISS		0x25
+
+/*
+ * Decoding of settings from perf_event_attr
+ *
+ * The config format for perf events is:
+ * - config: bits 0-7: event type
+ *           bit  32:  HW counter size requested, 0: 32 bits, 1: 64 bits
+ */
+
+static inline u32 get_event_type(struct perf_event *event)
+{
+	return (event->attr.config) & L3_EVTYPE_MASK;
+}
+
+static inline bool event_uses_long_counter(struct perf_event *event)
+{
+	return !!(event->attr.config & BIT_ULL(L3_EVENT_LC_BIT));
+}
+
+static inline int event_num_counters(struct perf_event *event)
+{
+	return event_uses_long_counter(event) ? 2 : 1;
+}
+
+/*
+ * Main PMU, inherits from the core perf PMU type
+ */
+struct l3cache_pmu {
+	struct pmu		pmu;
+	struct hlist_node	node;
+	void __iomem		*regs;
+	struct perf_event	*events[L3_NUM_COUNTERS];
+	unsigned long		used_mask[BITS_TO_LONGS(L3_NUM_COUNTERS)];
+	cpumask_t		cpumask;
+};
+
+#define to_l3cache_pmu(p) (container_of(p, struct l3cache_pmu, pmu))
+
+/*
+ * Type used to group hardware counter operations
+ *
+ * Used to implement two types of hardware counters, standard (32bits) and
+ * long (64bits). The hardware supports counter chaining which we use to
+ * implement long counters. This support is exposed via the 'lc' flag field
+ * in perf_event_attr.config.
+ */
+struct l3cache_event_ops {
+	/* Called to start event monitoring */
+	void (*start)(struct perf_event *event);
+	/* Called to stop event monitoring */
+	void (*stop)(struct perf_event *event, int flags);
+	/* Called to update the perf_event */
+	void (*update)(struct perf_event *event);
+};
+
+/*
+ * Implementation of long counter operations
+ *
+ * 64bit counters are implemented by chaining two of the 32bit physical
+ * counters. The PMU only supports chaining of adjacent even/odd pairs
+ * and for simplicity the driver always configures the odd counter to
+ * count the overflows of the lower-numbered even counter. Note that since
+ * the resulting hardware counter is 64bits no IRQs are required to maintain
+ * the software counter which is also 64bits.
+ */
+
+static void qcom_l3_cache__64bit_counter_start(struct perf_event *event)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
+	int idx = event->hw.idx;
+	u32 evsel = get_event_type(event);
+	u32 gang;
+
+	/* Set the odd counter to count the overflows of the even counter */
+	gang = readl_relaxed(l3pmu->regs + L3_M_BC_GANG);
+	gang |= GANG_EN(idx + 1);
+	writel_relaxed(gang, l3pmu->regs + L3_M_BC_GANG);
+
+	/* Initialize the hardware counters and reset prev_count*/
+	local64_set(&event->hw.prev_count, 0);
+	writel_relaxed(0, l3pmu->regs + L3_HML3_PM_EVCNTR(idx + 1));
+	writel_relaxed(0, l3pmu->regs + L3_HML3_PM_EVCNTR(idx));
+
+	/*
+	 * Set the event types, the upper half must use zero and the lower
+	 * half the actual event type
+	 */
+	writel_relaxed(EVSEL(0), l3pmu->regs + L3_HML3_PM_EVTYPE(idx + 1));
+	writel_relaxed(EVSEL(evsel), l3pmu->regs + L3_HML3_PM_EVTYPE(idx));
+
+	/* Finally, enable the counters */
+	writel_relaxed(PMCNT_RESET, l3pmu->regs + L3_HML3_PM_CNTCTL(idx + 1));
+	writel_relaxed(PMCNTENSET(idx + 1), l3pmu->regs + L3_M_BC_CNTENSET);
+	writel_relaxed(PMCNT_RESET, l3pmu->regs + L3_HML3_PM_CNTCTL(idx));
+	writel_relaxed(PMCNTENSET(idx), l3pmu->regs + L3_M_BC_CNTENSET);
+}
+
+static void qcom_l3_cache__64bit_counter_stop(struct perf_event *event,
+					      int flags)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
+	int idx = event->hw.idx;
+	u32 gang = readl_relaxed(l3pmu->regs + L3_M_BC_GANG);
+
+	/* Disable the counters */
+	writel_relaxed(PMCNTENCLR(idx), l3pmu->regs + L3_M_BC_CNTENCLR);
+	writel_relaxed(PMCNTENCLR(idx + 1), l3pmu->regs + L3_M_BC_CNTENCLR);
+
+	/* Disable chaining */
+	writel_relaxed(gang & ~GANG_EN(idx + 1), l3pmu->regs + L3_M_BC_GANG);
+}
+
+static void qcom_l3_cache__64bit_counter_update(struct perf_event *event)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
+	int idx = event->hw.idx;
+	u32 hi, lo;
+	u64 prev, new;
+
+	do {
+		prev = local64_read(&event->hw.prev_count);
+		do {
+			hi = readl_relaxed(l3pmu->regs + L3_HML3_PM_EVCNTR(idx + 1));
+			lo = readl_relaxed(l3pmu->regs + L3_HML3_PM_EVCNTR(idx));
+		} while (hi != readl_relaxed(l3pmu->regs + L3_HML3_PM_EVCNTR(idx + 1)));
+		new = ((u64)hi << 32) | lo;
+	} while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+
+	local64_add(new - prev, &event->count);
+}
+
+static const struct l3cache_event_ops event_ops_long = {
+	.start = qcom_l3_cache__64bit_counter_start,
+	.stop = qcom_l3_cache__64bit_counter_stop,
+	.update = qcom_l3_cache__64bit_counter_update,
+};
+
+/*
+ * Implementation of standard counter operations
+ *
+ * 32bit counters use a single physical counter and a hardware feature that
+ * asserts the overflow IRQ on the toggling of the most significant bit in
+ * the counter. This feature allows the counters to be left free-running
+ * without needing the usual reprogramming required to properly handle races
+ * during concurrent calls to update.
+ */
+
+static void qcom_l3_cache__32bit_counter_start(struct perf_event *event)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
+	int idx = event->hw.idx;
+	u32 evsel = get_event_type(event);
+	u32 irqctl = readl_relaxed(l3pmu->regs + L3_M_BC_IRQCTL);
+
+	/* Set the counter to assert the overflow IRQ on MSB toggling */
+	writel_relaxed(irqctl | PMIRQONMSBEN(idx), l3pmu->regs + L3_M_BC_IRQCTL);
+
+	/* Initialize the hardware counter and reset prev_count*/
+	local64_set(&event->hw.prev_count, 0);
+	writel_relaxed(0, l3pmu->regs + L3_HML3_PM_EVCNTR(idx));
+
+	/* Set the event type */
+	writel_relaxed(EVSEL(evsel), l3pmu->regs + L3_HML3_PM_EVTYPE(idx));
+
+	/* Enable interrupt generation by this counter */
+	writel_relaxed(PMINTENSET(idx), l3pmu->regs + L3_M_BC_INTENSET);
+
+	/* Finally, enable the counter */
+	writel_relaxed(PMCNT_RESET, l3pmu->regs + L3_HML3_PM_CNTCTL(idx));
+	writel_relaxed(PMCNTENSET(idx), l3pmu->regs + L3_M_BC_CNTENSET);
+}
+
+static void qcom_l3_cache__32bit_counter_stop(struct perf_event *event,
+					      int flags)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
+	int idx = event->hw.idx;
+	u32 irqctl = readl_relaxed(l3pmu->regs + L3_M_BC_IRQCTL);
+
+	/* Disable the counter */
+	writel_relaxed(PMCNTENCLR(idx), l3pmu->regs + L3_M_BC_CNTENCLR);
+
+	/* Disable interrupt generation by this counter */
+	writel_relaxed(PMINTENCLR(idx), l3pmu->regs + L3_M_BC_INTENCLR);
+
+	/* Set the counter to not assert the overflow IRQ on MSB toggling */
+	writel_relaxed(irqctl & ~PMIRQONMSBEN(idx), l3pmu->regs + L3_M_BC_IRQCTL);
+}
+
+static void qcom_l3_cache__32bit_counter_update(struct perf_event *event)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
+	int idx = event->hw.idx;
+	u32 prev, new;
+
+	do {
+		prev = local64_read(&event->hw.prev_count);
+		new = readl_relaxed(l3pmu->regs + L3_HML3_PM_EVCNTR(idx));
+	} while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+
+	local64_add(new - prev, &event->count);
+}
+
+static const struct l3cache_event_ops event_ops_std = {
+	.start = qcom_l3_cache__32bit_counter_start,
+	.stop = qcom_l3_cache__32bit_counter_stop,
+	.update = qcom_l3_cache__32bit_counter_update,
+};
+
+/* Retrieve the appropriate operations for the given event */
+static
+const struct l3cache_event_ops *l3cache_event_get_ops(struct perf_event *event)
+{
+	if (event_uses_long_counter(event))
+		return &event_ops_long;
+	else
+		return &event_ops_std;
+}
+
+/*
+ * Top level PMU functions.
+ */
+
+static inline void qcom_l3_cache__init(struct l3cache_pmu *l3pmu)
+{
+	int i;
+
+	writel_relaxed(BC_RESET, l3pmu->regs + L3_M_BC_CR);
+
+	/*
+	 * Use writel for the first programming command to ensure the basic
+	 * counter unit is stopped before proceeding
+	 */
+	writel(BC_SATROLL_CR_RESET, l3pmu->regs + L3_M_BC_SATROLL_CR);
+
+	writel_relaxed(BC_CNTENCLR_RESET, l3pmu->regs + L3_M_BC_CNTENCLR);
+	writel_relaxed(BC_INTENCLR_RESET, l3pmu->regs + L3_M_BC_INTENCLR);
+	writel_relaxed(PMOVSRCLR_RESET, l3pmu->regs + L3_M_BC_OVSR);
+	writel_relaxed(BC_GANG_RESET, l3pmu->regs + L3_M_BC_GANG);
+	writel_relaxed(BC_IRQCTL_RESET, l3pmu->regs + L3_M_BC_IRQCTL);
+	writel_relaxed(PM_CR_RESET, l3pmu->regs + L3_HML3_PM_CR);
+
+	for (i = 0; i < L3_NUM_COUNTERS; ++i) {
+		writel_relaxed(PMCNT_RESET, l3pmu->regs + L3_HML3_PM_CNTCTL(i));
+		writel_relaxed(EVSEL(0), l3pmu->regs + L3_HML3_PM_EVTYPE(i));
+	}
+
+	writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRA);
+	writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRAM);
+	writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRB);
+	writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRBM);
+	writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRC);
+	writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRCM);
+
+	/*
+	 * Use writel here to ensure all programming commands are done
+	 *  before proceeding
+	 */
+	writel(BC_ENABLE, l3pmu->regs + L3_M_BC_CR);
+}
+
+static irqreturn_t qcom_l3_cache__handle_irq(int irq_num, void *data)
+{
+	struct l3cache_pmu *l3pmu = data;
+	/* Read the overflow status register */
+	long status = readl_relaxed(l3pmu->regs + L3_M_BC_OVSR);
+	int idx;
+
+	if (status == 0)
+		return IRQ_NONE;
+
+	/* Clear the bits we read on the overflow status register */
+	writel_relaxed(status, l3pmu->regs + L3_M_BC_OVSR);
+
+	for_each_set_bit(idx, &status, L3_NUM_COUNTERS) {
+		struct perf_event *event;
+		const struct l3cache_event_ops *ops;
+
+		event = l3pmu->events[idx];
+		if (!event)
+			continue;
+
+		/*
+		 * Since the IRQ is not enabled for events using long counters
+		 * we should never see one of those here, however, be consistent
+		 * and use the ops indirections like in the other operations.
+		 */
+
+		ops = l3cache_event_get_ops(event);
+		ops->update(event);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Implementation of abstract pmu functionality required by
+ * the core perf events code.
+ */
+
+static void qcom_l3_cache__pmu_enable(struct pmu *pmu)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(pmu);
+
+	/* Ensure the other programming commands are observed before enabling */
+	wmb();
+
+	writel_relaxed(BC_ENABLE, l3pmu->regs + L3_M_BC_CR);
+}
+
+static void qcom_l3_cache__pmu_disable(struct pmu *pmu)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(pmu);
+
+	writel_relaxed(0, l3pmu->regs + L3_M_BC_CR);
+
+	/* Ensure the basic counter unit is stopped before proceeding */
+	wmb();
+}
+
+/*
+ * We must NOT create groups containing events from multiple hardware PMUs,
+ * although mixing different software and hardware PMUs is allowed.
+ */
+static bool qcom_l3_cache__validate_event_group(struct perf_event *event)
+{
+	struct perf_event *leader = event->group_leader;
+	struct perf_event *sibling;
+	int counters = 0;
+
+	if (leader->pmu != event->pmu && !is_software_event(leader))
+		return false;
+
+	counters = event_num_counters(event);
+	counters += event_num_counters(leader);
+
+	for_each_sibling_event(sibling, leader) {
+		if (is_software_event(sibling))
+			continue;
+		if (sibling->pmu != event->pmu)
+			return false;
+		counters += event_num_counters(sibling);
+	}
+
+	/*
+	 * If the group requires more counters than the HW has, it
+	 * cannot ever be scheduled.
+	 */
+	return counters <= L3_NUM_COUNTERS;
+}
+
+static int qcom_l3_cache__event_init(struct perf_event *event)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	/*
+	 * Is the event for this PMU?
+	 */
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/*
+	 * Sampling not supported since these events are not core-attributable.
+	 */
+	if (hwc->sample_period)
+		return -EINVAL;
+
+	/*
+	 * Task mode not available, we run the counters as socket counters,
+	 * not attributable to any CPU and therefore cannot attribute per-task.
+	 */
+	if (event->cpu < 0)
+		return -EINVAL;
+
+	/* Validate the group */
+	if (!qcom_l3_cache__validate_event_group(event))
+		return -EINVAL;
+
+	hwc->idx = -1;
+
+	/*
+	 * Many perf core operations (eg. events rotation) operate on a
+	 * single CPU context. This is obvious for CPU PMUs, where one
+	 * expects the same sets of events being observed on all CPUs,
+	 * but can lead to issues for off-core PMUs, like this one, where
+	 * each event could be theoretically assigned to a different CPU.
+	 * To mitigate this, we enforce CPU assignment to one designated
+	 * processor (the one described in the "cpumask" attribute exported
+	 * by the PMU device). perf user space tools honor this and avoid
+	 * opening more than one copy of the events.
+	 */
+	event->cpu = cpumask_first(&l3pmu->cpumask);
+
+	return 0;
+}
+
+static void qcom_l3_cache__event_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	const struct l3cache_event_ops *ops = l3cache_event_get_ops(event);
+
+	hwc->state = 0;
+	ops->start(event);
+}
+
+static void qcom_l3_cache__event_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	const struct l3cache_event_ops *ops = l3cache_event_get_ops(event);
+
+	if (hwc->state & PERF_HES_STOPPED)
+		return;
+
+	ops->stop(event, flags);
+	if (flags & PERF_EF_UPDATE)
+		ops->update(event);
+	hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static int qcom_l3_cache__event_add(struct perf_event *event, int flags)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int order = event_uses_long_counter(event) ? 1 : 0;
+	int idx;
+
+	/*
+	 * Try to allocate a counter.
+	 */
+	idx = bitmap_find_free_region(l3pmu->used_mask, L3_NUM_COUNTERS, order);
+	if (idx < 0)
+		/* The counters are all in use. */
+		return -EAGAIN;
+
+	hwc->idx = idx;
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	l3pmu->events[idx] = event;
+
+	if (flags & PERF_EF_START)
+		qcom_l3_cache__event_start(event, 0);
+
+	/* Propagate changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+	return 0;
+}
+
+static void qcom_l3_cache__event_del(struct perf_event *event, int flags)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int order = event_uses_long_counter(event) ? 1 : 0;
+
+	/* Stop and clean up */
+	qcom_l3_cache__event_stop(event,  flags | PERF_EF_UPDATE);
+	l3pmu->events[hwc->idx] = NULL;
+	bitmap_release_region(l3pmu->used_mask, hwc->idx, order);
+
+	/* Propagate changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+}
+
+static void qcom_l3_cache__event_read(struct perf_event *event)
+{
+	const struct l3cache_event_ops *ops = l3cache_event_get_ops(event);
+
+	ops->update(event);
+}
+
+/*
+ * Add sysfs attributes
+ *
+ * We export:
+ * - formats, used by perf user space and other tools to configure events
+ * - events, used by perf user space and other tools to create events
+ *   symbolically, e.g.:
+ *     perf stat -a -e l3cache_0_0/event=read-miss/ ls
+ *     perf stat -a -e l3cache_0_0/event=0x21/ ls
+ * - cpumask, used by perf user space and other tools to know on which CPUs
+ *   to open the events
+ */
+
+/* formats */
+
+static ssize_t l3cache_pmu_format_show(struct device *dev,
+				       struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *eattr;
+
+	eattr = container_of(attr, struct dev_ext_attribute, attr);
+	return sysfs_emit(buf, "%s\n", (char *) eattr->var);
+}
+
+#define L3CACHE_PMU_FORMAT_ATTR(_name, _config)				      \
+	(&((struct dev_ext_attribute[]) {				      \
+		{ .attr = __ATTR(_name, 0444, l3cache_pmu_format_show, NULL), \
+		  .var = (void *) _config, }				      \
+	})[0].attr.attr)
+
+static struct attribute *qcom_l3_cache_pmu_formats[] = {
+	L3CACHE_PMU_FORMAT_ATTR(event, "config:0-7"),
+	L3CACHE_PMU_FORMAT_ATTR(lc, "config:" __stringify(L3_EVENT_LC_BIT)),
+	NULL,
+};
+
+static const struct attribute_group qcom_l3_cache_pmu_format_group = {
+	.name = "format",
+	.attrs = qcom_l3_cache_pmu_formats,
+};
+
+/* events */
+
+static ssize_t l3cache_pmu_event_show(struct device *dev,
+				     struct device_attribute *attr, char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+	return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+#define L3CACHE_EVENT_ATTR(_name, _id)					     \
+	PMU_EVENT_ATTR_ID(_name, l3cache_pmu_event_show, _id)
+
+static struct attribute *qcom_l3_cache_pmu_events[] = {
+	L3CACHE_EVENT_ATTR(cycles, L3_EVENT_CYCLES),
+	L3CACHE_EVENT_ATTR(read-hit, L3_EVENT_READ_HIT),
+	L3CACHE_EVENT_ATTR(read-miss, L3_EVENT_READ_MISS),
+	L3CACHE_EVENT_ATTR(read-hit-d-side, L3_EVENT_READ_HIT_D),
+	L3CACHE_EVENT_ATTR(read-miss-d-side, L3_EVENT_READ_MISS_D),
+	L3CACHE_EVENT_ATTR(write-hit, L3_EVENT_WRITE_HIT),
+	L3CACHE_EVENT_ATTR(write-miss, L3_EVENT_WRITE_MISS),
+	NULL
+};
+
+static const struct attribute_group qcom_l3_cache_pmu_events_group = {
+	.name = "events",
+	.attrs = qcom_l3_cache_pmu_events,
+};
+
+/* cpumask */
+
+static ssize_t cpumask_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(dev_get_drvdata(dev));
+
+	return cpumap_print_to_pagebuf(true, buf, &l3pmu->cpumask);
+}
+
+static DEVICE_ATTR_RO(cpumask);
+
+static struct attribute *qcom_l3_cache_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static const struct attribute_group qcom_l3_cache_pmu_cpumask_attr_group = {
+	.attrs = qcom_l3_cache_pmu_cpumask_attrs,
+};
+
+/*
+ * Per PMU device attribute groups
+ */
+static const struct attribute_group *qcom_l3_cache_pmu_attr_grps[] = {
+	&qcom_l3_cache_pmu_format_group,
+	&qcom_l3_cache_pmu_events_group,
+	&qcom_l3_cache_pmu_cpumask_attr_group,
+	NULL,
+};
+
+/*
+ * Probing functions and data.
+ */
+
+static int qcom_l3_cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct l3cache_pmu *l3pmu = hlist_entry_safe(node, struct l3cache_pmu, node);
+
+	/* If there is not a CPU/PMU association pick this CPU */
+	if (cpumask_empty(&l3pmu->cpumask))
+		cpumask_set_cpu(cpu, &l3pmu->cpumask);
+
+	return 0;
+}
+
+static int qcom_l3_cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct l3cache_pmu *l3pmu = hlist_entry_safe(node, struct l3cache_pmu, node);
+	unsigned int target;
+
+	if (!cpumask_test_and_clear_cpu(cpu, &l3pmu->cpumask))
+		return 0;
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+	perf_pmu_migrate_context(&l3pmu->pmu, cpu, target);
+	cpumask_set_cpu(target, &l3pmu->cpumask);
+	return 0;
+}
+
+static int qcom_l3_cache_pmu_probe(struct platform_device *pdev)
+{
+	struct l3cache_pmu *l3pmu;
+	struct acpi_device *acpi_dev;
+	struct resource *memrc;
+	int ret;
+	char *name;
+
+	/* Initialize the PMU data structures */
+
+	acpi_dev = ACPI_COMPANION(&pdev->dev);
+	if (!acpi_dev)
+		return -ENODEV;
+
+	l3pmu = devm_kzalloc(&pdev->dev, sizeof(*l3pmu), GFP_KERNEL);
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "l3cache_%s_%s",
+		      acpi_dev_parent(acpi_dev)->pnp.unique_id,
+		      acpi_dev->pnp.unique_id);
+	if (!l3pmu || !name)
+		return -ENOMEM;
+
+	l3pmu->pmu = (struct pmu) {
+		.task_ctx_nr	= perf_invalid_context,
+
+		.pmu_enable	= qcom_l3_cache__pmu_enable,
+		.pmu_disable	= qcom_l3_cache__pmu_disable,
+		.event_init	= qcom_l3_cache__event_init,
+		.add		= qcom_l3_cache__event_add,
+		.del		= qcom_l3_cache__event_del,
+		.start		= qcom_l3_cache__event_start,
+		.stop		= qcom_l3_cache__event_stop,
+		.read		= qcom_l3_cache__event_read,
+
+		.attr_groups	= qcom_l3_cache_pmu_attr_grps,
+		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
+	};
+
+	l3pmu->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &memrc);
+	if (IS_ERR(l3pmu->regs))
+		return PTR_ERR(l3pmu->regs);
+
+	qcom_l3_cache__init(l3pmu);
+
+	ret = platform_get_irq(pdev, 0);
+	if (ret <= 0)
+		return ret;
+
+	ret = devm_request_irq(&pdev->dev, ret, qcom_l3_cache__handle_irq, 0,
+			       name, l3pmu);
+	if (ret) {
+		dev_err(&pdev->dev, "Request for IRQ failed for slice @%pa\n",
+			&memrc->start);
+		return ret;
+	}
+
+	/* Add this instance to the list used by the offline callback */
+	ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, &l3pmu->node);
+	if (ret) {
+		dev_err(&pdev->dev, "Error %d registering hotplug", ret);
+		return ret;
+	}
+
+	ret = perf_pmu_register(&l3pmu->pmu, name, -1);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to register L3 cache PMU (%d)\n", ret);
+		return ret;
+	}
+
+	dev_info(&pdev->dev, "Registered %s, type: %d\n", name, l3pmu->pmu.type);
+
+	return 0;
+}
+
+static const struct acpi_device_id qcom_l3_cache_pmu_acpi_match[] = {
+	{ "QCOM8081", },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, qcom_l3_cache_pmu_acpi_match);
+
+static struct platform_driver qcom_l3_cache_pmu_driver = {
+	.driver = {
+		.name = "qcom-l3cache-pmu",
+		.acpi_match_table = ACPI_PTR(qcom_l3_cache_pmu_acpi_match),
+		.suppress_bind_attrs = true,
+	},
+	.probe = qcom_l3_cache_pmu_probe,
+};
+
+static int __init register_qcom_l3_cache_pmu_driver(void)
+{
+	int ret;
+
+	/* Install a hook to update the reader CPU in case it goes offline */
+	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
+				      "perf/qcom/l3cache:online",
+				      qcom_l3_cache_pmu_online_cpu,
+				      qcom_l3_cache_pmu_offline_cpu);
+	if (ret)
+		return ret;
+
+	return platform_driver_register(&qcom_l3_cache_pmu_driver);
+}
+device_initcall(register_qcom_l3_cache_pmu_driver);
diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c
new file mode 100644
index 0000000000..0dda70e1ef
--- /dev/null
+++ b/drivers/perf/riscv_pmu.c
@@ -0,0 +1,434 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RISC-V performance counter support.
+ *
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ *
+ * This implementation is based on old RISC-V perf and ARM perf event code
+ * which are in turn based on sparc64 and x86 code.
+ */
+
+#include <linux/cpumask.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
+#include <linux/perf/riscv_pmu.h>
+#include <linux/printk.h>
+#include <linux/smp.h>
+#include <linux/sched_clock.h>
+
+#include <asm/sbi.h>
+
+static bool riscv_perf_user_access(struct perf_event *event)
+{
+	return ((event->attr.type == PERF_TYPE_HARDWARE) ||
+		(event->attr.type == PERF_TYPE_HW_CACHE) ||
+		(event->attr.type == PERF_TYPE_RAW)) &&
+		!!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT) &&
+		(event->hw.idx != -1);
+}
+
+void arch_perf_update_userpage(struct perf_event *event,
+			       struct perf_event_mmap_page *userpg, u64 now)
+{
+	struct clock_read_data *rd;
+	unsigned int seq;
+	u64 ns;
+
+	userpg->cap_user_time = 0;
+	userpg->cap_user_time_zero = 0;
+	userpg->cap_user_time_short = 0;
+	userpg->cap_user_rdpmc = riscv_perf_user_access(event);
+
+#ifdef CONFIG_RISCV_PMU
+	/*
+	 * The counters are 64-bit but the priv spec doesn't mandate all the
+	 * bits to be implemented: that's why, counter width can vary based on
+	 * the cpu vendor.
+	 */
+	if (userpg->cap_user_rdpmc)
+		userpg->pmc_width = to_riscv_pmu(event->pmu)->ctr_get_width(event->hw.idx) + 1;
+#endif
+
+	do {
+		rd = sched_clock_read_begin(&seq);
+
+		userpg->time_mult = rd->mult;
+		userpg->time_shift = rd->shift;
+		userpg->time_zero = rd->epoch_ns;
+		userpg->time_cycles = rd->epoch_cyc;
+		userpg->time_mask = rd->sched_clock_mask;
+
+		/*
+		 * Subtract the cycle base, such that software that
+		 * doesn't know about cap_user_time_short still 'works'
+		 * assuming no wraps.
+		 */
+		ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift);
+		userpg->time_zero -= ns;
+
+	} while (sched_clock_read_retry(seq));
+
+	userpg->time_offset = userpg->time_zero - now;
+
+	/*
+	 * time_shift is not expected to be greater than 31 due to
+	 * the original published conversion algorithm shifting a
+	 * 32-bit value (now specifies a 64-bit value) - refer
+	 * perf_event_mmap_page documentation in perf_event.h.
+	 */
+	if (userpg->time_shift == 32) {
+		userpg->time_shift = 31;
+		userpg->time_mult >>= 1;
+	}
+
+	/*
+	 * Internal timekeeping for enabled/running/stopped times
+	 * is always computed with the sched_clock.
+	 */
+	userpg->cap_user_time = 1;
+	userpg->cap_user_time_zero = 1;
+	userpg->cap_user_time_short = 1;
+}
+
+static unsigned long csr_read_num(int csr_num)
+{
+#define switchcase_csr_read(__csr_num, __val)		{\
+	case __csr_num:					\
+		__val = csr_read(__csr_num);		\
+		break; }
+#define switchcase_csr_read_2(__csr_num, __val)		{\
+	switchcase_csr_read(__csr_num + 0, __val)	 \
+	switchcase_csr_read(__csr_num + 1, __val)}
+#define switchcase_csr_read_4(__csr_num, __val)		{\
+	switchcase_csr_read_2(__csr_num + 0, __val)	 \
+	switchcase_csr_read_2(__csr_num + 2, __val)}
+#define switchcase_csr_read_8(__csr_num, __val)		{\
+	switchcase_csr_read_4(__csr_num + 0, __val)	 \
+	switchcase_csr_read_4(__csr_num + 4, __val)}
+#define switchcase_csr_read_16(__csr_num, __val)	{\
+	switchcase_csr_read_8(__csr_num + 0, __val)	 \
+	switchcase_csr_read_8(__csr_num + 8, __val)}
+#define switchcase_csr_read_32(__csr_num, __val)	{\
+	switchcase_csr_read_16(__csr_num + 0, __val)	 \
+	switchcase_csr_read_16(__csr_num + 16, __val)}
+
+	unsigned long ret = 0;
+
+	switch (csr_num) {
+	switchcase_csr_read_32(CSR_CYCLE, ret)
+	switchcase_csr_read_32(CSR_CYCLEH, ret)
+	default :
+		break;
+	}
+
+	return ret;
+#undef switchcase_csr_read_32
+#undef switchcase_csr_read_16
+#undef switchcase_csr_read_8
+#undef switchcase_csr_read_4
+#undef switchcase_csr_read_2
+#undef switchcase_csr_read
+}
+
+/*
+ * Read the CSR of a corresponding counter.
+ */
+unsigned long riscv_pmu_ctr_read_csr(unsigned long csr)
+{
+	if (csr < CSR_CYCLE || csr > CSR_HPMCOUNTER31H ||
+	   (csr > CSR_HPMCOUNTER31 && csr < CSR_CYCLEH)) {
+		pr_err("Invalid performance counter csr %lx\n", csr);
+		return -EINVAL;
+	}
+
+	return csr_read_num(csr);
+}
+
+u64 riscv_pmu_ctr_get_width_mask(struct perf_event *event)
+{
+	int cwidth;
+	struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (!rvpmu->ctr_get_width)
+	/**
+	 * If the pmu driver doesn't support counter width, set it to default
+	 * maximum allowed by the specification.
+	 */
+		cwidth = 63;
+	else {
+		if (hwc->idx == -1)
+			/* Handle init case where idx is not initialized yet */
+			cwidth = rvpmu->ctr_get_width(0);
+		else
+			cwidth = rvpmu->ctr_get_width(hwc->idx);
+	}
+
+	return GENMASK_ULL(cwidth, 0);
+}
+
+u64 riscv_pmu_event_update(struct perf_event *event)
+{
+	struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 prev_raw_count, new_raw_count;
+	unsigned long cmask;
+	u64 oldval, delta;
+
+	if (!rvpmu->ctr_read)
+		return 0;
+
+	cmask = riscv_pmu_ctr_get_width_mask(event);
+
+	do {
+		prev_raw_count = local64_read(&hwc->prev_count);
+		new_raw_count = rvpmu->ctr_read(event);
+		oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+					 new_raw_count);
+	} while (oldval != prev_raw_count);
+
+	delta = (new_raw_count - prev_raw_count) & cmask;
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
+
+	return delta;
+}
+
+void riscv_pmu_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+
+	WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+
+	if (!(hwc->state & PERF_HES_STOPPED)) {
+		if (rvpmu->ctr_stop) {
+			rvpmu->ctr_stop(event, 0);
+			hwc->state |= PERF_HES_STOPPED;
+		}
+		riscv_pmu_event_update(event);
+		hwc->state |= PERF_HES_UPTODATE;
+	}
+}
+
+int riscv_pmu_event_set_period(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	s64 left = local64_read(&hwc->period_left);
+	s64 period = hwc->sample_period;
+	int overflow = 0;
+	uint64_t max_period = riscv_pmu_ctr_get_width_mask(event);
+
+	if (unlikely(left <= -period)) {
+		left = period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		overflow = 1;
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		overflow = 1;
+	}
+
+	/*
+	 * Limit the maximum period to prevent the counter value
+	 * from overtaking the one we are about to program. In
+	 * effect we are reducing max_period to account for
+	 * interrupt latency (and we are being very conservative).
+	 */
+	if (left > (max_period >> 1))
+		left = (max_period >> 1);
+
+	local64_set(&hwc->prev_count, (u64)-left);
+
+	perf_event_update_userpage(event);
+
+	return overflow;
+}
+
+void riscv_pmu_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+	uint64_t max_period = riscv_pmu_ctr_get_width_mask(event);
+	u64 init_val;
+
+	if (flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+	hwc->state = 0;
+	riscv_pmu_event_set_period(event);
+	init_val = local64_read(&hwc->prev_count) & max_period;
+	rvpmu->ctr_start(event, init_val);
+	perf_event_update_userpage(event);
+}
+
+static int riscv_pmu_add(struct perf_event *event, int flags)
+{
+	struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+	struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+
+	idx = rvpmu->ctr_get_idx(event);
+	if (idx < 0)
+		return idx;
+
+	hwc->idx = idx;
+	cpuc->events[idx] = event;
+	cpuc->n_events++;
+	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+	if (flags & PERF_EF_START)
+		riscv_pmu_start(event, PERF_EF_RELOAD);
+
+	/* Propagate our changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+	return 0;
+}
+
+static void riscv_pmu_del(struct perf_event *event, int flags)
+{
+	struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+	struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+
+	riscv_pmu_stop(event, PERF_EF_UPDATE);
+	cpuc->events[hwc->idx] = NULL;
+	/* The firmware need to reset the counter mapping */
+	if (rvpmu->ctr_stop)
+		rvpmu->ctr_stop(event, RISCV_PMU_STOP_FLAG_RESET);
+	cpuc->n_events--;
+	if (rvpmu->ctr_clear_idx)
+		rvpmu->ctr_clear_idx(event);
+	perf_event_update_userpage(event);
+	hwc->idx = -1;
+}
+
+static void riscv_pmu_read(struct perf_event *event)
+{
+	riscv_pmu_event_update(event);
+}
+
+static int riscv_pmu_event_init(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+	int mapped_event;
+	u64 event_config = 0;
+	uint64_t cmask;
+
+	hwc->flags = 0;
+	mapped_event = rvpmu->event_map(event, &event_config);
+	if (mapped_event < 0) {
+		pr_debug("event %x:%llx not supported\n", event->attr.type,
+			 event->attr.config);
+		return mapped_event;
+	}
+
+	/*
+	 * idx is set to -1 because the index of a general event should not be
+	 * decided until binding to some counter in pmu->add().
+	 * config will contain the information about counter CSR
+	 * the idx will contain the counter index
+	 */
+	hwc->config = event_config;
+	hwc->idx = -1;
+	hwc->event_base = mapped_event;
+
+	if (rvpmu->event_init)
+		rvpmu->event_init(event);
+
+	if (!is_sampling_event(event)) {
+		/*
+		 * For non-sampling runs, limit the sample_period to half
+		 * of the counter width. That way, the new counter value
+		 * is far less likely to overtake the previous one unless
+		 * you have some serious IRQ latency issues.
+		 */
+		cmask = riscv_pmu_ctr_get_width_mask(event);
+		hwc->sample_period  =  cmask >> 1;
+		hwc->last_period    = hwc->sample_period;
+		local64_set(&hwc->period_left, hwc->sample_period);
+	}
+
+	return 0;
+}
+
+static int riscv_pmu_event_idx(struct perf_event *event)
+{
+	struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+
+	if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT))
+		return 0;
+
+	if (rvpmu->csr_index)
+		return rvpmu->csr_index(event) + 1;
+
+	return 0;
+}
+
+static void riscv_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
+{
+	struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+
+	if (rvpmu->event_mapped) {
+		rvpmu->event_mapped(event, mm);
+		perf_event_update_userpage(event);
+	}
+}
+
+static void riscv_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
+{
+	struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+
+	if (rvpmu->event_unmapped) {
+		rvpmu->event_unmapped(event, mm);
+		perf_event_update_userpage(event);
+	}
+}
+
+struct riscv_pmu *riscv_pmu_alloc(void)
+{
+	struct riscv_pmu *pmu;
+	int cpuid, i;
+	struct cpu_hw_events *cpuc;
+
+	pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
+	if (!pmu)
+		goto out;
+
+	pmu->hw_events = alloc_percpu_gfp(struct cpu_hw_events, GFP_KERNEL);
+	if (!pmu->hw_events) {
+		pr_info("failed to allocate per-cpu PMU data.\n");
+		goto out_free_pmu;
+	}
+
+	for_each_possible_cpu(cpuid) {
+		cpuc = per_cpu_ptr(pmu->hw_events, cpuid);
+		cpuc->n_events = 0;
+		for (i = 0; i < RISCV_MAX_COUNTERS; i++)
+			cpuc->events[i] = NULL;
+	}
+	pmu->pmu = (struct pmu) {
+		.event_init	= riscv_pmu_event_init,
+		.event_mapped	= riscv_pmu_event_mapped,
+		.event_unmapped	= riscv_pmu_event_unmapped,
+		.event_idx	= riscv_pmu_event_idx,
+		.add		= riscv_pmu_add,
+		.del		= riscv_pmu_del,
+		.start		= riscv_pmu_start,
+		.stop		= riscv_pmu_stop,
+		.read		= riscv_pmu_read,
+	};
+
+	return pmu;
+
+out_free_pmu:
+	kfree(pmu);
+out:
+	return NULL;
+}
diff --git a/drivers/perf/riscv_pmu_legacy.c b/drivers/perf/riscv_pmu_legacy.c
new file mode 100644
index 0000000000..79fdd66792
--- /dev/null
+++ b/drivers/perf/riscv_pmu_legacy.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RISC-V performance counter support.
+ *
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ *
+ * This implementation is based on old RISC-V perf and ARM perf event code
+ * which are in turn based on sparc64 and x86 code.
+ */
+
+#include <linux/mod_devicetable.h>
+#include <linux/perf/riscv_pmu.h>
+#include <linux/platform_device.h>
+
+#define RISCV_PMU_LEGACY_CYCLE		0
+#define RISCV_PMU_LEGACY_INSTRET	2
+
+static bool pmu_init_done;
+
+static int pmu_legacy_ctr_get_idx(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+
+	if (event->attr.type != PERF_TYPE_HARDWARE)
+		return -EOPNOTSUPP;
+	if (attr->config == PERF_COUNT_HW_CPU_CYCLES)
+		return RISCV_PMU_LEGACY_CYCLE;
+	else if (attr->config == PERF_COUNT_HW_INSTRUCTIONS)
+		return RISCV_PMU_LEGACY_INSTRET;
+	else
+		return -EOPNOTSUPP;
+}
+
+/* For legacy config & counter index are same */
+static int pmu_legacy_event_map(struct perf_event *event, u64 *config)
+{
+	return pmu_legacy_ctr_get_idx(event);
+}
+
+static u64 pmu_legacy_read_ctr(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+	u64 val;
+
+	if (idx == RISCV_PMU_LEGACY_CYCLE) {
+		val = riscv_pmu_ctr_read_csr(CSR_CYCLE);
+		if (IS_ENABLED(CONFIG_32BIT))
+			val = (u64)riscv_pmu_ctr_read_csr(CSR_CYCLEH) << 32 | val;
+	} else if (idx == RISCV_PMU_LEGACY_INSTRET) {
+		val = riscv_pmu_ctr_read_csr(CSR_INSTRET);
+		if (IS_ENABLED(CONFIG_32BIT))
+			val = ((u64)riscv_pmu_ctr_read_csr(CSR_INSTRETH)) << 32 | val;
+	} else
+		return 0;
+
+	return val;
+}
+
+static void pmu_legacy_ctr_start(struct perf_event *event, u64 ival)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 initial_val = pmu_legacy_read_ctr(event);
+
+	/**
+	 * The legacy method doesn't really have a start/stop method.
+	 * It also can not update the counter with a initial value.
+	 * But we still need to set the prev_count so that read() can compute
+	 * the delta. Just use the current counter value to set the prev_count.
+	 */
+	local64_set(&hwc->prev_count, initial_val);
+}
+
+static uint8_t pmu_legacy_csr_index(struct perf_event *event)
+{
+	return event->hw.idx;
+}
+
+static void pmu_legacy_event_mapped(struct perf_event *event, struct mm_struct *mm)
+{
+	if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES &&
+	    event->attr.config != PERF_COUNT_HW_INSTRUCTIONS)
+		return;
+
+	event->hw.flags |= PERF_EVENT_FLAG_USER_READ_CNT;
+}
+
+static void pmu_legacy_event_unmapped(struct perf_event *event, struct mm_struct *mm)
+{
+	if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES &&
+	    event->attr.config != PERF_COUNT_HW_INSTRUCTIONS)
+		return;
+
+	event->hw.flags &= ~PERF_EVENT_FLAG_USER_READ_CNT;
+}
+
+/*
+ * This is just a simple implementation to allow legacy implementations
+ * compatible with new RISC-V PMU driver framework.
+ * This driver only allows reading two counters i.e CYCLE & INSTRET.
+ * However, it can not start or stop the counter. Thus, it is not very useful
+ * will be removed in future.
+ */
+static void pmu_legacy_init(struct riscv_pmu *pmu)
+{
+	pr_info("Legacy PMU implementation is available\n");
+
+	pmu->cmask = BIT(RISCV_PMU_LEGACY_CYCLE) |
+		BIT(RISCV_PMU_LEGACY_INSTRET);
+	pmu->ctr_start = pmu_legacy_ctr_start;
+	pmu->ctr_stop = NULL;
+	pmu->event_map = pmu_legacy_event_map;
+	pmu->ctr_get_idx = pmu_legacy_ctr_get_idx;
+	pmu->ctr_get_width = NULL;
+	pmu->ctr_clear_idx = NULL;
+	pmu->ctr_read = pmu_legacy_read_ctr;
+	pmu->event_mapped = pmu_legacy_event_mapped;
+	pmu->event_unmapped = pmu_legacy_event_unmapped;
+	pmu->csr_index = pmu_legacy_csr_index;
+
+	perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW);
+}
+
+static int pmu_legacy_device_probe(struct platform_device *pdev)
+{
+	struct riscv_pmu *pmu = NULL;
+
+	pmu = riscv_pmu_alloc();
+	if (!pmu)
+		return -ENOMEM;
+	pmu_legacy_init(pmu);
+
+	return 0;
+}
+
+static struct platform_driver pmu_legacy_driver = {
+	.probe		= pmu_legacy_device_probe,
+	.driver		= {
+		.name	= RISCV_PMU_LEGACY_PDEV_NAME,
+	},
+};
+
+static int __init riscv_pmu_legacy_devinit(void)
+{
+	int ret;
+	struct platform_device *pdev;
+
+	if (likely(pmu_init_done))
+		return 0;
+
+	ret = platform_driver_register(&pmu_legacy_driver);
+	if (ret)
+		return ret;
+
+	pdev = platform_device_register_simple(RISCV_PMU_LEGACY_PDEV_NAME, -1, NULL, 0);
+	if (IS_ERR(pdev)) {
+		platform_driver_unregister(&pmu_legacy_driver);
+		return PTR_ERR(pdev);
+	}
+
+	return ret;
+}
+late_initcall(riscv_pmu_legacy_devinit);
+
+void riscv_pmu_legacy_skip_init(void)
+{
+	pmu_init_done = true;
+}
diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c
new file mode 100644
index 0000000000..cd8a2b9efd
--- /dev/null
+++ b/drivers/perf/riscv_pmu_sbi.c
@@ -0,0 +1,1135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RISC-V performance counter support.
+ *
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ *
+ * This code is based on ARM perf event code which is in turn based on
+ * sparc64 and x86 code.
+ */
+
+#define pr_fmt(fmt) "riscv-pmu-sbi: " fmt
+
+#include <linux/mod_devicetable.h>
+#include <linux/perf/riscv_pmu.h>
+#include <linux/platform_device.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/of_irq.h>
+#include <linux/of.h>
+#include <linux/cpu_pm.h>
+#include <linux/sched/clock.h>
+
+#include <asm/errata_list.h>
+#include <asm/sbi.h>
+#include <asm/hwcap.h>
+
+#define SYSCTL_NO_USER_ACCESS	0
+#define SYSCTL_USER_ACCESS	1
+#define SYSCTL_LEGACY		2
+
+#define PERF_EVENT_FLAG_NO_USER_ACCESS	BIT(SYSCTL_NO_USER_ACCESS)
+#define PERF_EVENT_FLAG_USER_ACCESS	BIT(SYSCTL_USER_ACCESS)
+#define PERF_EVENT_FLAG_LEGACY		BIT(SYSCTL_LEGACY)
+
+PMU_FORMAT_ATTR(event, "config:0-47");
+PMU_FORMAT_ATTR(firmware, "config:63");
+
+static struct attribute *riscv_arch_formats_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_firmware.attr,
+	NULL,
+};
+
+static struct attribute_group riscv_pmu_format_group = {
+	.name = "format",
+	.attrs = riscv_arch_formats_attr,
+};
+
+static const struct attribute_group *riscv_pmu_attr_groups[] = {
+	&riscv_pmu_format_group,
+	NULL,
+};
+
+/* Allow user mode access by default */
+static int sysctl_perf_user_access __read_mostly = SYSCTL_USER_ACCESS;
+
+/*
+ * RISC-V doesn't have heterogeneous harts yet. This need to be part of
+ * per_cpu in case of harts with different pmu counters
+ */
+static union sbi_pmu_ctr_info *pmu_ctr_list;
+static bool riscv_pmu_use_irq;
+static unsigned int riscv_pmu_irq_num;
+static unsigned int riscv_pmu_irq;
+
+/* Cache the available counters in a bitmask */
+static unsigned long cmask;
+
+struct sbi_pmu_event_data {
+	union {
+		union {
+			struct hw_gen_event {
+				uint32_t event_code:16;
+				uint32_t event_type:4;
+				uint32_t reserved:12;
+			} hw_gen_event;
+			struct hw_cache_event {
+				uint32_t result_id:1;
+				uint32_t op_id:2;
+				uint32_t cache_id:13;
+				uint32_t event_type:4;
+				uint32_t reserved:12;
+			} hw_cache_event;
+		};
+		uint32_t event_idx;
+	};
+};
+
+static const struct sbi_pmu_event_data pmu_hw_event_map[] = {
+	[PERF_COUNT_HW_CPU_CYCLES]		= {.hw_gen_event = {
+							SBI_PMU_HW_CPU_CYCLES,
+							SBI_PMU_EVENT_TYPE_HW, 0}},
+	[PERF_COUNT_HW_INSTRUCTIONS]		= {.hw_gen_event = {
+							SBI_PMU_HW_INSTRUCTIONS,
+							SBI_PMU_EVENT_TYPE_HW, 0}},
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= {.hw_gen_event = {
+							SBI_PMU_HW_CACHE_REFERENCES,
+							SBI_PMU_EVENT_TYPE_HW, 0}},
+	[PERF_COUNT_HW_CACHE_MISSES]		= {.hw_gen_event = {
+							SBI_PMU_HW_CACHE_MISSES,
+							SBI_PMU_EVENT_TYPE_HW, 0}},
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= {.hw_gen_event = {
+							SBI_PMU_HW_BRANCH_INSTRUCTIONS,
+							SBI_PMU_EVENT_TYPE_HW, 0}},
+	[PERF_COUNT_HW_BRANCH_MISSES]		= {.hw_gen_event = {
+							SBI_PMU_HW_BRANCH_MISSES,
+							SBI_PMU_EVENT_TYPE_HW, 0}},
+	[PERF_COUNT_HW_BUS_CYCLES]		= {.hw_gen_event = {
+							SBI_PMU_HW_BUS_CYCLES,
+							SBI_PMU_EVENT_TYPE_HW, 0}},
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= {.hw_gen_event = {
+							SBI_PMU_HW_STALLED_CYCLES_FRONTEND,
+							SBI_PMU_EVENT_TYPE_HW, 0}},
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= {.hw_gen_event = {
+							SBI_PMU_HW_STALLED_CYCLES_BACKEND,
+							SBI_PMU_EVENT_TYPE_HW, 0}},
+	[PERF_COUNT_HW_REF_CPU_CYCLES]		= {.hw_gen_event = {
+							SBI_PMU_HW_REF_CPU_CYCLES,
+							SBI_PMU_EVENT_TYPE_HW, 0}},
+};
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
+[PERF_COUNT_HW_CACHE_OP_MAX]
+[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+					C(OP_READ), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+			[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+					C(OP_READ), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+					C(OP_WRITE), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+			[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+					C(OP_WRITE), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+					C(OP_PREFETCH), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+			[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+					C(OP_PREFETCH), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = {.hw_cache_event =	{C(RESULT_ACCESS),
+					C(OP_READ), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+			[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), C(OP_READ),
+					C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+					C(OP_WRITE), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+			[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+					C(OP_WRITE), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+					C(OP_PREFETCH), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+			[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+					C(OP_PREFETCH), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+					C(OP_READ), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+			[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+					C(OP_READ), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+					C(OP_WRITE), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+			[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+					C(OP_WRITE), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+					C(OP_PREFETCH), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+			[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+					C(OP_PREFETCH), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+		},
+	},
+	[C(DTLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+					C(OP_READ), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+			[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+					C(OP_READ), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+					C(OP_WRITE), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+			[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+					C(OP_WRITE), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+					C(OP_PREFETCH), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+			[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+					C(OP_PREFETCH), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+					C(OP_READ), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+			[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+					C(OP_READ), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+					C(OP_WRITE), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+			[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+					C(OP_WRITE), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+					C(OP_PREFETCH), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+			[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+					C(OP_PREFETCH), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+					C(OP_READ), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+			[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+					C(OP_READ), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+					C(OP_WRITE), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+			[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+					C(OP_WRITE), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+					C(OP_PREFETCH), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+			[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+					C(OP_PREFETCH), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+		},
+	},
+	[C(NODE)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+					C(OP_READ), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+			[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+					C(OP_READ), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+					C(OP_WRITE), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+			[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+					C(OP_WRITE), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS),
+					C(OP_PREFETCH), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+			[C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS),
+					C(OP_PREFETCH), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}},
+		},
+	},
+};
+
+static int pmu_sbi_ctr_get_width(int idx)
+{
+	return pmu_ctr_list[idx].width;
+}
+
+static bool pmu_sbi_ctr_is_fw(int cidx)
+{
+	union sbi_pmu_ctr_info *info;
+
+	info = &pmu_ctr_list[cidx];
+	if (!info)
+		return false;
+
+	return (info->type == SBI_PMU_CTR_TYPE_FW) ? true : false;
+}
+
+/*
+ * Returns the counter width of a programmable counter and number of hardware
+ * counters. As we don't support heterogeneous CPUs yet, it is okay to just
+ * return the counter width of the first programmable counter.
+ */
+int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr)
+{
+	int i;
+	union sbi_pmu_ctr_info *info;
+	u32 hpm_width = 0, hpm_count = 0;
+
+	if (!cmask)
+		return -EINVAL;
+
+	for_each_set_bit(i, &cmask, RISCV_MAX_COUNTERS) {
+		info = &pmu_ctr_list[i];
+		if (!info)
+			continue;
+		if (!hpm_width && info->csr != CSR_CYCLE && info->csr != CSR_INSTRET)
+			hpm_width = info->width;
+		if (info->type == SBI_PMU_CTR_TYPE_HW)
+			hpm_count++;
+	}
+
+	*hw_ctr_width = hpm_width;
+	*num_hw_ctr = hpm_count;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(riscv_pmu_get_hpm_info);
+
+static uint8_t pmu_sbi_csr_index(struct perf_event *event)
+{
+	return pmu_ctr_list[event->hw.idx].csr - CSR_CYCLE;
+}
+
+static unsigned long pmu_sbi_get_filter_flags(struct perf_event *event)
+{
+	unsigned long cflags = 0;
+	bool guest_events = false;
+
+	if (event->attr.config1 & RISCV_PMU_CONFIG1_GUEST_EVENTS)
+		guest_events = true;
+	if (event->attr.exclude_kernel)
+		cflags |= guest_events ? SBI_PMU_CFG_FLAG_SET_VSINH : SBI_PMU_CFG_FLAG_SET_SINH;
+	if (event->attr.exclude_user)
+		cflags |= guest_events ? SBI_PMU_CFG_FLAG_SET_VUINH : SBI_PMU_CFG_FLAG_SET_UINH;
+	if (guest_events && event->attr.exclude_hv)
+		cflags |= SBI_PMU_CFG_FLAG_SET_SINH;
+	if (event->attr.exclude_host)
+		cflags |= SBI_PMU_CFG_FLAG_SET_UINH | SBI_PMU_CFG_FLAG_SET_SINH;
+	if (event->attr.exclude_guest)
+		cflags |= SBI_PMU_CFG_FLAG_SET_VSINH | SBI_PMU_CFG_FLAG_SET_VUINH;
+
+	return cflags;
+}
+
+static int pmu_sbi_ctr_get_idx(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+	struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
+	struct sbiret ret;
+	int idx;
+	uint64_t cbase = 0, cmask = rvpmu->cmask;
+	unsigned long cflags = 0;
+
+	cflags = pmu_sbi_get_filter_flags(event);
+
+	/*
+	 * In legacy mode, we have to force the fixed counters for those events
+	 * but not in the user access mode as we want to use the other counters
+	 * that support sampling/filtering.
+	 */
+	if (hwc->flags & PERF_EVENT_FLAG_LEGACY) {
+		if (event->attr.config == PERF_COUNT_HW_CPU_CYCLES) {
+			cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH;
+			cmask = 1;
+		} else if (event->attr.config == PERF_COUNT_HW_INSTRUCTIONS) {
+			cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH;
+			cmask = 1UL << (CSR_INSTRET - CSR_CYCLE);
+		}
+	}
+
+	/* retrieve the available counter index */
+#if defined(CONFIG_32BIT)
+	ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase,
+			cmask, cflags, hwc->event_base, hwc->config,
+			hwc->config >> 32);
+#else
+	ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase,
+			cmask, cflags, hwc->event_base, hwc->config, 0);
+#endif
+	if (ret.error) {
+		pr_debug("Not able to find a counter for event %lx config %llx\n",
+			hwc->event_base, hwc->config);
+		return sbi_err_map_linux_errno(ret.error);
+	}
+
+	idx = ret.value;
+	if (!test_bit(idx, &rvpmu->cmask) || !pmu_ctr_list[idx].value)
+		return -ENOENT;
+
+	/* Additional sanity check for the counter id */
+	if (pmu_sbi_ctr_is_fw(idx)) {
+		if (!test_and_set_bit(idx, cpuc->used_fw_ctrs))
+			return idx;
+	} else {
+		if (!test_and_set_bit(idx, cpuc->used_hw_ctrs))
+			return idx;
+	}
+
+	return -ENOENT;
+}
+
+static void pmu_sbi_ctr_clear_idx(struct perf_event *event)
+{
+
+	struct hw_perf_event *hwc = &event->hw;
+	struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+	struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
+	int idx = hwc->idx;
+
+	if (pmu_sbi_ctr_is_fw(idx))
+		clear_bit(idx, cpuc->used_fw_ctrs);
+	else
+		clear_bit(idx, cpuc->used_hw_ctrs);
+}
+
+static int pmu_event_find_cache(u64 config)
+{
+	unsigned int cache_type, cache_op, cache_result, ret;
+
+	cache_type = (config >>  0) & 0xff;
+	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+		return -EINVAL;
+
+	cache_op = (config >>  8) & 0xff;
+	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+		return -EINVAL;
+
+	cache_result = (config >> 16) & 0xff;
+	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return -EINVAL;
+
+	ret = pmu_cache_event_map[cache_type][cache_op][cache_result].event_idx;
+
+	return ret;
+}
+
+static bool pmu_sbi_is_fw_event(struct perf_event *event)
+{
+	u32 type = event->attr.type;
+	u64 config = event->attr.config;
+
+	if ((type == PERF_TYPE_RAW) && ((config >> 63) == 1))
+		return true;
+	else
+		return false;
+}
+
+static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig)
+{
+	u32 type = event->attr.type;
+	u64 config = event->attr.config;
+	int bSoftware;
+	u64 raw_config_val;
+	int ret;
+
+	switch (type) {
+	case PERF_TYPE_HARDWARE:
+		if (config >= PERF_COUNT_HW_MAX)
+			return -EINVAL;
+		ret = pmu_hw_event_map[event->attr.config].event_idx;
+		break;
+	case PERF_TYPE_HW_CACHE:
+		ret = pmu_event_find_cache(config);
+		break;
+	case PERF_TYPE_RAW:
+		/*
+		 * As per SBI specification, the upper 16 bits must be unused for
+		 * a raw event. Use the MSB (63b) to distinguish between hardware
+		 * raw event and firmware events.
+		 */
+		bSoftware = config >> 63;
+		raw_config_val = config & RISCV_PMU_RAW_EVENT_MASK;
+		if (bSoftware) {
+			ret = (raw_config_val & 0xFFFF) |
+				(SBI_PMU_EVENT_TYPE_FW << 16);
+		} else {
+			ret = RISCV_PMU_RAW_EVENT_IDX;
+			*econfig = raw_config_val;
+		}
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static u64 pmu_sbi_ctr_read(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+	struct sbiret ret;
+	union sbi_pmu_ctr_info info;
+	u64 val = 0;
+
+	if (pmu_sbi_is_fw_event(event)) {
+		ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ,
+				hwc->idx, 0, 0, 0, 0, 0);
+		if (!ret.error)
+			val = ret.value;
+	} else {
+		info = pmu_ctr_list[idx];
+		val = riscv_pmu_ctr_read_csr(info.csr);
+		if (IS_ENABLED(CONFIG_32BIT))
+			val = ((u64)riscv_pmu_ctr_read_csr(info.csr + 0x80)) << 31 | val;
+	}
+
+	return val;
+}
+
+static void pmu_sbi_set_scounteren(void *arg)
+{
+	struct perf_event *event = (struct perf_event *)arg;
+
+	if (event->hw.idx != -1)
+		csr_write(CSR_SCOUNTEREN,
+			  csr_read(CSR_SCOUNTEREN) | (1 << pmu_sbi_csr_index(event)));
+}
+
+static void pmu_sbi_reset_scounteren(void *arg)
+{
+	struct perf_event *event = (struct perf_event *)arg;
+
+	if (event->hw.idx != -1)
+		csr_write(CSR_SCOUNTEREN,
+			  csr_read(CSR_SCOUNTEREN) & ~(1 << pmu_sbi_csr_index(event)));
+}
+
+static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival)
+{
+	struct sbiret ret;
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE;
+
+#if defined(CONFIG_32BIT)
+	ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, hwc->idx,
+			1, flag, ival, ival >> 32, 0);
+#else
+	ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, hwc->idx,
+			1, flag, ival, 0, 0);
+#endif
+	if (ret.error && (ret.error != SBI_ERR_ALREADY_STARTED))
+		pr_err("Starting counter idx %d failed with error %d\n",
+			hwc->idx, sbi_err_map_linux_errno(ret.error));
+
+	if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) &&
+	    (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT))
+		pmu_sbi_set_scounteren((void *)event);
+}
+
+static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag)
+{
+	struct sbiret ret;
+	struct hw_perf_event *hwc = &event->hw;
+
+	if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) &&
+	    (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT))
+		pmu_sbi_reset_scounteren((void *)event);
+
+	ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, hwc->idx, 1, flag, 0, 0, 0);
+	if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) &&
+		flag != SBI_PMU_STOP_FLAG_RESET)
+		pr_err("Stopping counter idx %d failed with error %d\n",
+			hwc->idx, sbi_err_map_linux_errno(ret.error));
+}
+
+static int pmu_sbi_find_num_ctrs(void)
+{
+	struct sbiret ret;
+
+	ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_NUM_COUNTERS, 0, 0, 0, 0, 0, 0);
+	if (!ret.error)
+		return ret.value;
+	else
+		return sbi_err_map_linux_errno(ret.error);
+}
+
+static int pmu_sbi_get_ctrinfo(int nctr, unsigned long *mask)
+{
+	struct sbiret ret;
+	int i, num_hw_ctr = 0, num_fw_ctr = 0;
+	union sbi_pmu_ctr_info cinfo;
+
+	pmu_ctr_list = kcalloc(nctr, sizeof(*pmu_ctr_list), GFP_KERNEL);
+	if (!pmu_ctr_list)
+		return -ENOMEM;
+
+	for (i = 0; i < nctr; i++) {
+		ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i, 0, 0, 0, 0, 0);
+		if (ret.error)
+			/* The logical counter ids are not expected to be contiguous */
+			continue;
+
+		*mask |= BIT(i);
+
+		cinfo.value = ret.value;
+		if (cinfo.type == SBI_PMU_CTR_TYPE_FW)
+			num_fw_ctr++;
+		else
+			num_hw_ctr++;
+		pmu_ctr_list[i].value = cinfo.value;
+	}
+
+	pr_info("%d firmware and %d hardware counters\n", num_fw_ctr, num_hw_ctr);
+
+	return 0;
+}
+
+static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu)
+{
+	/*
+	 * No need to check the error because we are disabling all the counters
+	 * which may include counters that are not enabled yet.
+	 */
+	sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP,
+		  0, pmu->cmask, 0, 0, 0, 0);
+}
+
+static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu)
+{
+	struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
+
+	/* No need to check the error here as we can't do anything about the error */
+	sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, 0,
+		  cpu_hw_evt->used_hw_ctrs[0], 0, 0, 0, 0);
+}
+
+/*
+ * This function starts all the used counters in two step approach.
+ * Any counter that did not overflow can be start in a single step
+ * while the overflowed counters need to be started with updated initialization
+ * value.
+ */
+static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
+					       unsigned long ctr_ovf_mask)
+{
+	int idx = 0;
+	struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
+	struct perf_event *event;
+	unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE;
+	unsigned long ctr_start_mask = 0;
+	uint64_t max_period;
+	struct hw_perf_event *hwc;
+	u64 init_val = 0;
+
+	ctr_start_mask = cpu_hw_evt->used_hw_ctrs[0] & ~ctr_ovf_mask;
+
+	/* Start all the counters that did not overflow in a single shot */
+	sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, 0, ctr_start_mask,
+		  0, 0, 0, 0);
+
+	/* Reinitialize and start all the counter that overflowed */
+	while (ctr_ovf_mask) {
+		if (ctr_ovf_mask & 0x01) {
+			event = cpu_hw_evt->events[idx];
+			hwc = &event->hw;
+			max_period = riscv_pmu_ctr_get_width_mask(event);
+			init_val = local64_read(&hwc->prev_count) & max_period;
+#if defined(CONFIG_32BIT)
+			sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx, 1,
+				  flag, init_val, init_val >> 32, 0);
+#else
+			sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx, 1,
+				  flag, init_val, 0, 0);
+#endif
+			perf_event_update_userpage(event);
+		}
+		ctr_ovf_mask = ctr_ovf_mask >> 1;
+		idx++;
+	}
+}
+
+static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
+{
+	struct perf_sample_data data;
+	struct pt_regs *regs;
+	struct hw_perf_event *hw_evt;
+	union sbi_pmu_ctr_info *info;
+	int lidx, hidx, fidx;
+	struct riscv_pmu *pmu;
+	struct perf_event *event;
+	unsigned long overflow;
+	unsigned long overflowed_ctrs = 0;
+	struct cpu_hw_events *cpu_hw_evt = dev;
+	u64 start_clock = sched_clock();
+
+	if (WARN_ON_ONCE(!cpu_hw_evt))
+		return IRQ_NONE;
+
+	/* Firmware counter don't support overflow yet */
+	fidx = find_first_bit(cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS);
+	if (fidx == RISCV_MAX_COUNTERS) {
+		csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num));
+		return IRQ_NONE;
+	}
+
+	event = cpu_hw_evt->events[fidx];
+	if (!event) {
+		csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num));
+		return IRQ_NONE;
+	}
+
+	pmu = to_riscv_pmu(event->pmu);
+	pmu_sbi_stop_hw_ctrs(pmu);
+
+	/* Overflow status register should only be read after counter are stopped */
+	ALT_SBI_PMU_OVERFLOW(overflow);
+
+	/*
+	 * Overflow interrupt pending bit should only be cleared after stopping
+	 * all the counters to avoid any race condition.
+	 */
+	csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num));
+
+	/* No overflow bit is set */
+	if (!overflow)
+		return IRQ_NONE;
+
+	regs = get_irq_regs();
+
+	for_each_set_bit(lidx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS) {
+		struct perf_event *event = cpu_hw_evt->events[lidx];
+
+		/* Skip if invalid event or user did not request a sampling */
+		if (!event || !is_sampling_event(event))
+			continue;
+
+		info = &pmu_ctr_list[lidx];
+		/* Do a sanity check */
+		if (!info || info->type != SBI_PMU_CTR_TYPE_HW)
+			continue;
+
+		/* compute hardware counter index */
+		hidx = info->csr - CSR_CYCLE;
+		/* check if the corresponding bit is set in sscountovf */
+		if (!(overflow & (1 << hidx)))
+			continue;
+
+		/*
+		 * Keep a track of overflowed counters so that they can be started
+		 * with updated initial value.
+		 */
+		overflowed_ctrs |= 1 << lidx;
+		hw_evt = &event->hw;
+		riscv_pmu_event_update(event);
+		perf_sample_data_init(&data, 0, hw_evt->last_period);
+		if (riscv_pmu_event_set_period(event)) {
+			/*
+			 * Unlike other ISAs, RISC-V don't have to disable interrupts
+			 * to avoid throttling here. As per the specification, the
+			 * interrupt remains disabled until the OF bit is set.
+			 * Interrupts are enabled again only during the start.
+			 * TODO: We will need to stop the guest counters once
+			 * virtualization support is added.
+			 */
+			perf_event_overflow(event, &data, regs);
+		}
+	}
+
+	pmu_sbi_start_overflow_mask(pmu, overflowed_ctrs);
+	perf_sample_event_took(sched_clock() - start_clock);
+
+	return IRQ_HANDLED;
+}
+
+static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct riscv_pmu *pmu = hlist_entry_safe(node, struct riscv_pmu, node);
+	struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
+
+	/*
+	 * We keep enabling userspace access to CYCLE, TIME and INSTRET via the
+	 * legacy option but that will be removed in the future.
+	 */
+	if (sysctl_perf_user_access == SYSCTL_LEGACY)
+		csr_write(CSR_SCOUNTEREN, 0x7);
+	else
+		csr_write(CSR_SCOUNTEREN, 0x2);
+
+	/* Stop all the counters so that they can be enabled from perf */
+	pmu_sbi_stop_all(pmu);
+
+	if (riscv_pmu_use_irq) {
+		cpu_hw_evt->irq = riscv_pmu_irq;
+		csr_clear(CSR_IP, BIT(riscv_pmu_irq_num));
+		csr_set(CSR_IE, BIT(riscv_pmu_irq_num));
+		enable_percpu_irq(riscv_pmu_irq, IRQ_TYPE_NONE);
+	}
+
+	return 0;
+}
+
+static int pmu_sbi_dying_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	if (riscv_pmu_use_irq) {
+		disable_percpu_irq(riscv_pmu_irq);
+		csr_clear(CSR_IE, BIT(riscv_pmu_irq_num));
+	}
+
+	/* Disable all counters access for user mode now */
+	csr_write(CSR_SCOUNTEREN, 0x0);
+
+	return 0;
+}
+
+static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pdev)
+{
+	int ret;
+	struct cpu_hw_events __percpu *hw_events = pmu->hw_events;
+	struct irq_domain *domain = NULL;
+
+	if (riscv_isa_extension_available(NULL, SSCOFPMF)) {
+		riscv_pmu_irq_num = RV_IRQ_PMU;
+		riscv_pmu_use_irq = true;
+	} else if (IS_ENABLED(CONFIG_ERRATA_THEAD_PMU) &&
+		   riscv_cached_mvendorid(0) == THEAD_VENDOR_ID &&
+		   riscv_cached_marchid(0) == 0 &&
+		   riscv_cached_mimpid(0) == 0) {
+		riscv_pmu_irq_num = THEAD_C9XX_RV_IRQ_PMU;
+		riscv_pmu_use_irq = true;
+	}
+
+	if (!riscv_pmu_use_irq)
+		return -EOPNOTSUPP;
+
+	domain = irq_find_matching_fwnode(riscv_get_intc_hwnode(),
+					  DOMAIN_BUS_ANY);
+	if (!domain) {
+		pr_err("Failed to find INTC IRQ root domain\n");
+		return -ENODEV;
+	}
+
+	riscv_pmu_irq = irq_create_mapping(domain, riscv_pmu_irq_num);
+	if (!riscv_pmu_irq) {
+		pr_err("Failed to map PMU interrupt for node\n");
+		return -ENODEV;
+	}
+
+	ret = request_percpu_irq(riscv_pmu_irq, pmu_sbi_ovf_handler, "riscv-pmu", hw_events);
+	if (ret) {
+		pr_err("registering percpu irq failed [%d]\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_CPU_PM
+static int riscv_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
+				void *v)
+{
+	struct riscv_pmu *rvpmu = container_of(b, struct riscv_pmu, riscv_pm_nb);
+	struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
+	int enabled = bitmap_weight(cpuc->used_hw_ctrs, RISCV_MAX_COUNTERS);
+	struct perf_event *event;
+	int idx;
+
+	if (!enabled)
+		return NOTIFY_OK;
+
+	for (idx = 0; idx < RISCV_MAX_COUNTERS; idx++) {
+		event = cpuc->events[idx];
+		if (!event)
+			continue;
+
+		switch (cmd) {
+		case CPU_PM_ENTER:
+			/*
+			 * Stop and update the counter
+			 */
+			riscv_pmu_stop(event, PERF_EF_UPDATE);
+			break;
+		case CPU_PM_EXIT:
+		case CPU_PM_ENTER_FAILED:
+			/*
+			 * Restore and enable the counter.
+			 */
+			riscv_pmu_start(event, PERF_EF_RELOAD);
+			break;
+		default:
+			break;
+		}
+	}
+
+	return NOTIFY_OK;
+}
+
+static int riscv_pm_pmu_register(struct riscv_pmu *pmu)
+{
+	pmu->riscv_pm_nb.notifier_call = riscv_pm_pmu_notify;
+	return cpu_pm_register_notifier(&pmu->riscv_pm_nb);
+}
+
+static void riscv_pm_pmu_unregister(struct riscv_pmu *pmu)
+{
+	cpu_pm_unregister_notifier(&pmu->riscv_pm_nb);
+}
+#else
+static inline int riscv_pm_pmu_register(struct riscv_pmu *pmu) { return 0; }
+static inline void riscv_pm_pmu_unregister(struct riscv_pmu *pmu) { }
+#endif
+
+static void riscv_pmu_destroy(struct riscv_pmu *pmu)
+{
+	riscv_pm_pmu_unregister(pmu);
+	cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
+}
+
+static void pmu_sbi_event_init(struct perf_event *event)
+{
+	/*
+	 * The permissions are set at event_init so that we do not depend
+	 * on the sysctl value that can change.
+	 */
+	if (sysctl_perf_user_access == SYSCTL_NO_USER_ACCESS)
+		event->hw.flags |= PERF_EVENT_FLAG_NO_USER_ACCESS;
+	else if (sysctl_perf_user_access == SYSCTL_USER_ACCESS)
+		event->hw.flags |= PERF_EVENT_FLAG_USER_ACCESS;
+	else
+		event->hw.flags |= PERF_EVENT_FLAG_LEGACY;
+}
+
+static void pmu_sbi_event_mapped(struct perf_event *event, struct mm_struct *mm)
+{
+	if (event->hw.flags & PERF_EVENT_FLAG_NO_USER_ACCESS)
+		return;
+
+	if (event->hw.flags & PERF_EVENT_FLAG_LEGACY) {
+		if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES &&
+		    event->attr.config != PERF_COUNT_HW_INSTRUCTIONS) {
+			return;
+		}
+	}
+
+	/*
+	 * The user mmapped the event to directly access it: this is where
+	 * we determine based on sysctl_perf_user_access if we grant userspace
+	 * the direct access to this event. That means that within the same
+	 * task, some events may be directly accessible and some other may not,
+	 * if the user changes the value of sysctl_perf_user_accesss in the
+	 * meantime.
+	 */
+
+	event->hw.flags |= PERF_EVENT_FLAG_USER_READ_CNT;
+
+	/*
+	 * We must enable userspace access *before* advertising in the user page
+	 * that it is possible to do so to avoid any race.
+	 * And we must notify all cpus here because threads that currently run
+	 * on other cpus will try to directly access the counter too without
+	 * calling pmu_sbi_ctr_start.
+	 */
+	if (event->hw.flags & PERF_EVENT_FLAG_USER_ACCESS)
+		on_each_cpu_mask(mm_cpumask(mm),
+				 pmu_sbi_set_scounteren, (void *)event, 1);
+}
+
+static void pmu_sbi_event_unmapped(struct perf_event *event, struct mm_struct *mm)
+{
+	if (event->hw.flags & PERF_EVENT_FLAG_NO_USER_ACCESS)
+		return;
+
+	if (event->hw.flags & PERF_EVENT_FLAG_LEGACY) {
+		if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES &&
+		    event->attr.config != PERF_COUNT_HW_INSTRUCTIONS) {
+			return;
+		}
+	}
+
+	/*
+	 * Here we can directly remove user access since the user does not have
+	 * access to the user page anymore so we avoid the racy window where the
+	 * user could have read cap_user_rdpmc to true right before we disable
+	 * it.
+	 */
+	event->hw.flags &= ~PERF_EVENT_FLAG_USER_READ_CNT;
+
+	if (event->hw.flags & PERF_EVENT_FLAG_USER_ACCESS)
+		on_each_cpu_mask(mm_cpumask(mm),
+				 pmu_sbi_reset_scounteren, (void *)event, 1);
+}
+
+static void riscv_pmu_update_counter_access(void *info)
+{
+	if (sysctl_perf_user_access == SYSCTL_LEGACY)
+		csr_write(CSR_SCOUNTEREN, 0x7);
+	else
+		csr_write(CSR_SCOUNTEREN, 0x2);
+}
+
+static int riscv_pmu_proc_user_access_handler(struct ctl_table *table,
+					      int write, void *buffer,
+					      size_t *lenp, loff_t *ppos)
+{
+	int prev = sysctl_perf_user_access;
+	int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+	/*
+	 * Test against the previous value since we clear SCOUNTEREN when
+	 * sysctl_perf_user_access is set to SYSCTL_USER_ACCESS, but we should
+	 * not do that if that was already the case.
+	 */
+	if (ret || !write || prev == sysctl_perf_user_access)
+		return ret;
+
+	on_each_cpu(riscv_pmu_update_counter_access, NULL, 1);
+
+	return 0;
+}
+
+static struct ctl_table sbi_pmu_sysctl_table[] = {
+	{
+		.procname       = "perf_user_access",
+		.data		= &sysctl_perf_user_access,
+		.maxlen		= sizeof(unsigned int),
+		.mode           = 0644,
+		.proc_handler	= riscv_pmu_proc_user_access_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_TWO,
+	},
+	{ }
+};
+
+static int pmu_sbi_device_probe(struct platform_device *pdev)
+{
+	struct riscv_pmu *pmu = NULL;
+	int ret = -ENODEV;
+	int num_counters;
+
+	pr_info("SBI PMU extension is available\n");
+	pmu = riscv_pmu_alloc();
+	if (!pmu)
+		return -ENOMEM;
+
+	num_counters = pmu_sbi_find_num_ctrs();
+	if (num_counters < 0) {
+		pr_err("SBI PMU extension doesn't provide any counters\n");
+		goto out_free;
+	}
+
+	/* It is possible to get from SBI more than max number of counters */
+	if (num_counters > RISCV_MAX_COUNTERS) {
+		num_counters = RISCV_MAX_COUNTERS;
+		pr_info("SBI returned more than maximum number of counters. Limiting the number of counters to %d\n", num_counters);
+	}
+
+	/* cache all the information about counters now */
+	if (pmu_sbi_get_ctrinfo(num_counters, &cmask))
+		goto out_free;
+
+	ret = pmu_sbi_setup_irqs(pmu, pdev);
+	if (ret < 0) {
+		pr_info("Perf sampling/filtering is not supported as sscof extension is not available\n");
+		pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+		pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE;
+	}
+
+	pmu->pmu.attr_groups = riscv_pmu_attr_groups;
+	pmu->cmask = cmask;
+	pmu->ctr_start = pmu_sbi_ctr_start;
+	pmu->ctr_stop = pmu_sbi_ctr_stop;
+	pmu->event_map = pmu_sbi_event_map;
+	pmu->ctr_get_idx = pmu_sbi_ctr_get_idx;
+	pmu->ctr_get_width = pmu_sbi_ctr_get_width;
+	pmu->ctr_clear_idx = pmu_sbi_ctr_clear_idx;
+	pmu->ctr_read = pmu_sbi_ctr_read;
+	pmu->event_init = pmu_sbi_event_init;
+	pmu->event_mapped = pmu_sbi_event_mapped;
+	pmu->event_unmapped = pmu_sbi_event_unmapped;
+	pmu->csr_index = pmu_sbi_csr_index;
+
+	ret = cpuhp_state_add_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
+	if (ret)
+		return ret;
+
+	ret = riscv_pm_pmu_register(pmu);
+	if (ret)
+		goto out_unregister;
+
+	ret = perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW);
+	if (ret)
+		goto out_unregister;
+
+	register_sysctl("kernel", sbi_pmu_sysctl_table);
+
+	return 0;
+
+out_unregister:
+	riscv_pmu_destroy(pmu);
+
+out_free:
+	kfree(pmu);
+	return ret;
+}
+
+static struct platform_driver pmu_sbi_driver = {
+	.probe		= pmu_sbi_device_probe,
+	.driver		= {
+		.name	= RISCV_PMU_SBI_PDEV_NAME,
+	},
+};
+
+static int __init pmu_sbi_devinit(void)
+{
+	int ret;
+	struct platform_device *pdev;
+
+	if (sbi_spec_version < sbi_mk_version(0, 3) ||
+	    !sbi_probe_extension(SBI_EXT_PMU)) {
+		return 0;
+	}
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_RISCV_STARTING,
+				      "perf/riscv/pmu:starting",
+				      pmu_sbi_starting_cpu, pmu_sbi_dying_cpu);
+	if (ret) {
+		pr_err("CPU hotplug notifier could not be registered: %d\n",
+		       ret);
+		return ret;
+	}
+
+	ret = platform_driver_register(&pmu_sbi_driver);
+	if (ret)
+		return ret;
+
+	pdev = platform_device_register_simple(RISCV_PMU_SBI_PDEV_NAME, -1, NULL, 0);
+	if (IS_ERR(pdev)) {
+		platform_driver_unregister(&pmu_sbi_driver);
+		return PTR_ERR(pdev);
+	}
+
+	/* Notify legacy implementation that SBI pmu is available*/
+	riscv_pmu_legacy_skip_init();
+
+	return ret;
+}
+device_initcall(pmu_sbi_devinit)
diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c
new file mode 100644
index 0000000000..1edb9c0370
--- /dev/null
+++ b/drivers/perf/thunderx2_pmu.c
@@ -0,0 +1,1054 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CAVIUM THUNDERX2 SoC PMU UNCORE
+ * Copyright (C) 2018 Cavium Inc.
+ * Author: Ganapatrao Kulkarni <gkulkarni@cavium.com>
+ */
+
+#include <linux/acpi.h>
+#include <linux/cpuhotplug.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+
+/* Each ThunderX2(TX2) Socket has a L3C and DMC UNCORE PMU device.
+ * Each UNCORE PMU device consists of 4 independent programmable counters.
+ * Counters are 32 bit and do not support overflow interrupt,
+ * they need to be sampled before overflow(i.e, at every 2 seconds).
+ */
+
+#define TX2_PMU_DMC_L3C_MAX_COUNTERS	4
+#define TX2_PMU_CCPI2_MAX_COUNTERS	8
+#define TX2_PMU_MAX_COUNTERS		TX2_PMU_CCPI2_MAX_COUNTERS
+
+
+#define TX2_PMU_DMC_CHANNELS		8
+#define TX2_PMU_L3_TILES		16
+
+#define TX2_PMU_HRTIMER_INTERVAL	(2 * NSEC_PER_SEC)
+#define GET_EVENTID(ev, mask)		((ev->hw.config) & mask)
+#define GET_COUNTERID(ev, mask)		((ev->hw.idx) & mask)
+ /* 1 byte per counter(4 counters).
+  * Event id is encoded in bits [5:1] of a byte,
+  */
+#define DMC_EVENT_CFG(idx, val)		((val) << (((idx) * 8) + 1))
+
+/* bits[3:0] to select counters, are indexed from 8 to 15. */
+#define CCPI2_COUNTER_OFFSET		8
+
+#define L3C_COUNTER_CTL			0xA8
+#define L3C_COUNTER_DATA		0xAC
+#define DMC_COUNTER_CTL			0x234
+#define DMC_COUNTER_DATA		0x240
+
+#define CCPI2_PERF_CTL			0x108
+#define CCPI2_COUNTER_CTL		0x10C
+#define CCPI2_COUNTER_SEL		0x12c
+#define CCPI2_COUNTER_DATA_L		0x130
+#define CCPI2_COUNTER_DATA_H		0x134
+
+/* L3C event IDs */
+#define L3_EVENT_READ_REQ		0xD
+#define L3_EVENT_WRITEBACK_REQ		0xE
+#define L3_EVENT_INV_N_WRITE_REQ	0xF
+#define L3_EVENT_INV_REQ		0x10
+#define L3_EVENT_EVICT_REQ		0x13
+#define L3_EVENT_INV_N_WRITE_HIT	0x14
+#define L3_EVENT_INV_HIT		0x15
+#define L3_EVENT_READ_HIT		0x17
+#define L3_EVENT_MAX			0x18
+
+/* DMC event IDs */
+#define DMC_EVENT_COUNT_CYCLES		0x1
+#define DMC_EVENT_WRITE_TXNS		0xB
+#define DMC_EVENT_DATA_TRANSFERS	0xD
+#define DMC_EVENT_READ_TXNS		0xF
+#define DMC_EVENT_MAX			0x10
+
+#define CCPI2_EVENT_REQ_PKT_SENT	0x3D
+#define CCPI2_EVENT_SNOOP_PKT_SENT	0x65
+#define CCPI2_EVENT_DATA_PKT_SENT	0x105
+#define CCPI2_EVENT_GIC_PKT_SENT	0x12D
+#define CCPI2_EVENT_MAX			0x200
+
+#define CCPI2_PERF_CTL_ENABLE		BIT(0)
+#define CCPI2_PERF_CTL_START		BIT(1)
+#define CCPI2_PERF_CTL_RESET		BIT(4)
+#define CCPI2_EVENT_LEVEL_RISING_EDGE	BIT(10)
+#define CCPI2_EVENT_TYPE_EDGE_SENSITIVE	BIT(11)
+
+enum tx2_uncore_type {
+	PMU_TYPE_L3C,
+	PMU_TYPE_DMC,
+	PMU_TYPE_CCPI2,
+	PMU_TYPE_INVALID,
+};
+
+/*
+ * Each socket has 3 uncore devices associated with a PMU. The DMC and
+ * L3C have 4 32-bit counters and the CCPI2 has 8 64-bit counters.
+ */
+struct tx2_uncore_pmu {
+	struct hlist_node hpnode;
+	struct list_head  entry;
+	struct pmu pmu;
+	char *name;
+	int node;
+	int cpu;
+	u32 max_counters;
+	u32 counters_mask;
+	u32 prorate_factor;
+	u32 max_events;
+	u32 events_mask;
+	u64 hrtimer_interval;
+	void __iomem *base;
+	DECLARE_BITMAP(active_counters, TX2_PMU_MAX_COUNTERS);
+	struct perf_event *events[TX2_PMU_MAX_COUNTERS];
+	struct device *dev;
+	struct hrtimer hrtimer;
+	const struct attribute_group **attr_groups;
+	enum tx2_uncore_type type;
+	enum hrtimer_restart (*hrtimer_callback)(struct hrtimer *cb);
+	void (*init_cntr_base)(struct perf_event *event,
+			struct tx2_uncore_pmu *tx2_pmu);
+	void (*stop_event)(struct perf_event *event);
+	void (*start_event)(struct perf_event *event, int flags);
+};
+
+static LIST_HEAD(tx2_pmus);
+
+static inline struct tx2_uncore_pmu *pmu_to_tx2_pmu(struct pmu *pmu)
+{
+	return container_of(pmu, struct tx2_uncore_pmu, pmu);
+}
+
+#define TX2_PMU_FORMAT_ATTR(_var, _name, _format)			\
+static ssize_t								\
+__tx2_pmu_##_var##_show(struct device *dev,				\
+			       struct device_attribute *attr,		\
+			       char *page)				\
+{									\
+	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);			\
+	return sysfs_emit(page, _format "\n");				\
+}									\
+									\
+static struct device_attribute format_attr_##_var =			\
+	__ATTR(_name, 0444, __tx2_pmu_##_var##_show, NULL)
+
+TX2_PMU_FORMAT_ATTR(event, event, "config:0-4");
+TX2_PMU_FORMAT_ATTR(event_ccpi2, event, "config:0-9");
+
+static struct attribute *l3c_pmu_format_attrs[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute *dmc_pmu_format_attrs[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute *ccpi2_pmu_format_attrs[] = {
+	&format_attr_event_ccpi2.attr,
+	NULL,
+};
+
+static const struct attribute_group l3c_pmu_format_attr_group = {
+	.name = "format",
+	.attrs = l3c_pmu_format_attrs,
+};
+
+static const struct attribute_group dmc_pmu_format_attr_group = {
+	.name = "format",
+	.attrs = dmc_pmu_format_attrs,
+};
+
+static const struct attribute_group ccpi2_pmu_format_attr_group = {
+	.name = "format",
+	.attrs = ccpi2_pmu_format_attrs,
+};
+
+/*
+ * sysfs event attributes
+ */
+static ssize_t tx2_pmu_event_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *eattr;
+
+	eattr = container_of(attr, struct dev_ext_attribute, attr);
+	return sysfs_emit(buf, "event=0x%lx\n", (unsigned long) eattr->var);
+}
+
+#define TX2_EVENT_ATTR(name, config) \
+	PMU_EVENT_ATTR(name, tx2_pmu_event_attr_##name, \
+			config, tx2_pmu_event_show)
+
+TX2_EVENT_ATTR(read_request, L3_EVENT_READ_REQ);
+TX2_EVENT_ATTR(writeback_request, L3_EVENT_WRITEBACK_REQ);
+TX2_EVENT_ATTR(inv_nwrite_request, L3_EVENT_INV_N_WRITE_REQ);
+TX2_EVENT_ATTR(inv_request, L3_EVENT_INV_REQ);
+TX2_EVENT_ATTR(evict_request, L3_EVENT_EVICT_REQ);
+TX2_EVENT_ATTR(inv_nwrite_hit, L3_EVENT_INV_N_WRITE_HIT);
+TX2_EVENT_ATTR(inv_hit, L3_EVENT_INV_HIT);
+TX2_EVENT_ATTR(read_hit, L3_EVENT_READ_HIT);
+
+static struct attribute *l3c_pmu_events_attrs[] = {
+	&tx2_pmu_event_attr_read_request.attr.attr,
+	&tx2_pmu_event_attr_writeback_request.attr.attr,
+	&tx2_pmu_event_attr_inv_nwrite_request.attr.attr,
+	&tx2_pmu_event_attr_inv_request.attr.attr,
+	&tx2_pmu_event_attr_evict_request.attr.attr,
+	&tx2_pmu_event_attr_inv_nwrite_hit.attr.attr,
+	&tx2_pmu_event_attr_inv_hit.attr.attr,
+	&tx2_pmu_event_attr_read_hit.attr.attr,
+	NULL,
+};
+
+TX2_EVENT_ATTR(cnt_cycles, DMC_EVENT_COUNT_CYCLES);
+TX2_EVENT_ATTR(write_txns, DMC_EVENT_WRITE_TXNS);
+TX2_EVENT_ATTR(data_transfers, DMC_EVENT_DATA_TRANSFERS);
+TX2_EVENT_ATTR(read_txns, DMC_EVENT_READ_TXNS);
+
+static struct attribute *dmc_pmu_events_attrs[] = {
+	&tx2_pmu_event_attr_cnt_cycles.attr.attr,
+	&tx2_pmu_event_attr_write_txns.attr.attr,
+	&tx2_pmu_event_attr_data_transfers.attr.attr,
+	&tx2_pmu_event_attr_read_txns.attr.attr,
+	NULL,
+};
+
+TX2_EVENT_ATTR(req_pktsent, CCPI2_EVENT_REQ_PKT_SENT);
+TX2_EVENT_ATTR(snoop_pktsent, CCPI2_EVENT_SNOOP_PKT_SENT);
+TX2_EVENT_ATTR(data_pktsent, CCPI2_EVENT_DATA_PKT_SENT);
+TX2_EVENT_ATTR(gic_pktsent, CCPI2_EVENT_GIC_PKT_SENT);
+
+static struct attribute *ccpi2_pmu_events_attrs[] = {
+	&tx2_pmu_event_attr_req_pktsent.attr.attr,
+	&tx2_pmu_event_attr_snoop_pktsent.attr.attr,
+	&tx2_pmu_event_attr_data_pktsent.attr.attr,
+	&tx2_pmu_event_attr_gic_pktsent.attr.attr,
+	NULL,
+};
+
+static const struct attribute_group l3c_pmu_events_attr_group = {
+	.name = "events",
+	.attrs = l3c_pmu_events_attrs,
+};
+
+static const struct attribute_group dmc_pmu_events_attr_group = {
+	.name = "events",
+	.attrs = dmc_pmu_events_attrs,
+};
+
+static const struct attribute_group ccpi2_pmu_events_attr_group = {
+	.name = "events",
+	.attrs = ccpi2_pmu_events_attrs,
+};
+
+/*
+ * sysfs cpumask attributes
+ */
+static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	struct tx2_uncore_pmu *tx2_pmu;
+
+	tx2_pmu = pmu_to_tx2_pmu(dev_get_drvdata(dev));
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(tx2_pmu->cpu));
+}
+static DEVICE_ATTR_RO(cpumask);
+
+static struct attribute *tx2_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static const struct attribute_group pmu_cpumask_attr_group = {
+	.attrs = tx2_pmu_cpumask_attrs,
+};
+
+/*
+ * Per PMU device attribute groups
+ */
+static const struct attribute_group *l3c_pmu_attr_groups[] = {
+	&l3c_pmu_format_attr_group,
+	&pmu_cpumask_attr_group,
+	&l3c_pmu_events_attr_group,
+	NULL
+};
+
+static const struct attribute_group *dmc_pmu_attr_groups[] = {
+	&dmc_pmu_format_attr_group,
+	&pmu_cpumask_attr_group,
+	&dmc_pmu_events_attr_group,
+	NULL
+};
+
+static const struct attribute_group *ccpi2_pmu_attr_groups[] = {
+	&ccpi2_pmu_format_attr_group,
+	&pmu_cpumask_attr_group,
+	&ccpi2_pmu_events_attr_group,
+	NULL
+};
+
+static inline u32 reg_readl(unsigned long addr)
+{
+	return readl((void __iomem *)addr);
+}
+
+static inline void reg_writel(u32 val, unsigned long addr)
+{
+	writel(val, (void __iomem *)addr);
+}
+
+static int alloc_counter(struct tx2_uncore_pmu *tx2_pmu)
+{
+	int counter;
+
+	counter = find_first_zero_bit(tx2_pmu->active_counters,
+				tx2_pmu->max_counters);
+	if (counter == tx2_pmu->max_counters)
+		return -ENOSPC;
+
+	set_bit(counter, tx2_pmu->active_counters);
+	return counter;
+}
+
+static inline void free_counter(struct tx2_uncore_pmu *tx2_pmu, int counter)
+{
+	clear_bit(counter, tx2_pmu->active_counters);
+}
+
+static void init_cntr_base_l3c(struct perf_event *event,
+		struct tx2_uncore_pmu *tx2_pmu)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u32 cmask;
+
+	tx2_pmu = pmu_to_tx2_pmu(event->pmu);
+	cmask = tx2_pmu->counters_mask;
+
+	/* counter ctrl/data reg offset at 8 */
+	hwc->config_base = (unsigned long)tx2_pmu->base
+		+ L3C_COUNTER_CTL + (8 * GET_COUNTERID(event, cmask));
+	hwc->event_base =  (unsigned long)tx2_pmu->base
+		+ L3C_COUNTER_DATA + (8 * GET_COUNTERID(event, cmask));
+}
+
+static void init_cntr_base_dmc(struct perf_event *event,
+		struct tx2_uncore_pmu *tx2_pmu)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u32 cmask;
+
+	tx2_pmu = pmu_to_tx2_pmu(event->pmu);
+	cmask = tx2_pmu->counters_mask;
+
+	hwc->config_base = (unsigned long)tx2_pmu->base
+		+ DMC_COUNTER_CTL;
+	/* counter data reg offset at 0xc */
+	hwc->event_base = (unsigned long)tx2_pmu->base
+		+ DMC_COUNTER_DATA + (0xc * GET_COUNTERID(event, cmask));
+}
+
+static void init_cntr_base_ccpi2(struct perf_event *event,
+		struct tx2_uncore_pmu *tx2_pmu)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u32 cmask;
+
+	cmask = tx2_pmu->counters_mask;
+
+	hwc->config_base = (unsigned long)tx2_pmu->base
+		+ CCPI2_COUNTER_CTL + (4 * GET_COUNTERID(event, cmask));
+	hwc->event_base =  (unsigned long)tx2_pmu->base;
+}
+
+static void uncore_start_event_l3c(struct perf_event *event, int flags)
+{
+	u32 val, emask;
+	struct hw_perf_event *hwc = &event->hw;
+	struct tx2_uncore_pmu *tx2_pmu;
+
+	tx2_pmu = pmu_to_tx2_pmu(event->pmu);
+	emask = tx2_pmu->events_mask;
+
+	/* event id encoded in bits [07:03] */
+	val = GET_EVENTID(event, emask) << 3;
+	reg_writel(val, hwc->config_base);
+	local64_set(&hwc->prev_count, 0);
+	reg_writel(0, hwc->event_base);
+}
+
+static inline void uncore_stop_event_l3c(struct perf_event *event)
+{
+	reg_writel(0, event->hw.config_base);
+}
+
+static void uncore_start_event_dmc(struct perf_event *event, int flags)
+{
+	u32 val, cmask, emask;
+	struct hw_perf_event *hwc = &event->hw;
+	struct tx2_uncore_pmu *tx2_pmu;
+	int idx, event_id;
+
+	tx2_pmu = pmu_to_tx2_pmu(event->pmu);
+	cmask = tx2_pmu->counters_mask;
+	emask = tx2_pmu->events_mask;
+
+	idx = GET_COUNTERID(event, cmask);
+	event_id = GET_EVENTID(event, emask);
+
+	/* enable and start counters.
+	 * 8 bits for each counter, bits[05:01] of a counter to set event type.
+	 */
+	val = reg_readl(hwc->config_base);
+	val &= ~DMC_EVENT_CFG(idx, 0x1f);
+	val |= DMC_EVENT_CFG(idx, event_id);
+	reg_writel(val, hwc->config_base);
+	local64_set(&hwc->prev_count, 0);
+	reg_writel(0, hwc->event_base);
+}
+
+static void uncore_stop_event_dmc(struct perf_event *event)
+{
+	u32 val, cmask;
+	struct hw_perf_event *hwc = &event->hw;
+	struct tx2_uncore_pmu *tx2_pmu;
+	int idx;
+
+	tx2_pmu = pmu_to_tx2_pmu(event->pmu);
+	cmask = tx2_pmu->counters_mask;
+	idx = GET_COUNTERID(event, cmask);
+
+	/* clear event type(bits[05:01]) to stop counter */
+	val = reg_readl(hwc->config_base);
+	val &= ~DMC_EVENT_CFG(idx, 0x1f);
+	reg_writel(val, hwc->config_base);
+}
+
+static void uncore_start_event_ccpi2(struct perf_event *event, int flags)
+{
+	u32 emask;
+	struct hw_perf_event *hwc = &event->hw;
+	struct tx2_uncore_pmu *tx2_pmu;
+
+	tx2_pmu = pmu_to_tx2_pmu(event->pmu);
+	emask = tx2_pmu->events_mask;
+
+	/* Bit [09:00] to set event id.
+	 * Bits [10], set level to rising edge.
+	 * Bits [11], set type to edge sensitive.
+	 */
+	reg_writel((CCPI2_EVENT_TYPE_EDGE_SENSITIVE |
+			CCPI2_EVENT_LEVEL_RISING_EDGE |
+			GET_EVENTID(event, emask)), hwc->config_base);
+
+	/* reset[4], enable[0] and start[1] counters */
+	reg_writel(CCPI2_PERF_CTL_RESET |
+			CCPI2_PERF_CTL_START |
+			CCPI2_PERF_CTL_ENABLE,
+			hwc->event_base + CCPI2_PERF_CTL);
+	local64_set(&event->hw.prev_count, 0ULL);
+}
+
+static void uncore_stop_event_ccpi2(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	/* disable and stop counter */
+	reg_writel(0, hwc->event_base + CCPI2_PERF_CTL);
+}
+
+static void tx2_uncore_event_update(struct perf_event *event)
+{
+	u64 prev, delta, new = 0;
+	struct hw_perf_event *hwc = &event->hw;
+	struct tx2_uncore_pmu *tx2_pmu;
+	enum tx2_uncore_type type;
+	u32 prorate_factor;
+	u32 cmask, emask;
+
+	tx2_pmu = pmu_to_tx2_pmu(event->pmu);
+	type = tx2_pmu->type;
+	cmask = tx2_pmu->counters_mask;
+	emask = tx2_pmu->events_mask;
+	prorate_factor = tx2_pmu->prorate_factor;
+	if (type == PMU_TYPE_CCPI2) {
+		reg_writel(CCPI2_COUNTER_OFFSET +
+				GET_COUNTERID(event, cmask),
+				hwc->event_base + CCPI2_COUNTER_SEL);
+		new = reg_readl(hwc->event_base + CCPI2_COUNTER_DATA_H);
+		new = (new << 32) +
+			reg_readl(hwc->event_base + CCPI2_COUNTER_DATA_L);
+		prev = local64_xchg(&hwc->prev_count, new);
+		delta = new - prev;
+	} else {
+		new = reg_readl(hwc->event_base);
+		prev = local64_xchg(&hwc->prev_count, new);
+		/* handles rollover of 32 bit counter */
+		delta = (u32)(((1ULL << 32) - prev) + new);
+	}
+
+	/* DMC event data_transfers granularity is 16 Bytes, convert it to 64 */
+	if (type == PMU_TYPE_DMC &&
+			GET_EVENTID(event, emask) == DMC_EVENT_DATA_TRANSFERS)
+		delta = delta/4;
+
+	/* L3C and DMC has 16 and 8 interleave channels respectively.
+	 * The sampled value is for channel 0 and multiplied with
+	 * prorate_factor to get the count for a device.
+	 */
+	local64_add(delta * prorate_factor, &event->count);
+}
+
+static enum tx2_uncore_type get_tx2_pmu_type(struct acpi_device *adev)
+{
+	int i = 0;
+	struct acpi_tx2_pmu_device {
+		__u8 id[ACPI_ID_LEN];
+		enum tx2_uncore_type type;
+	} devices[] = {
+		{"CAV901D", PMU_TYPE_L3C},
+		{"CAV901F", PMU_TYPE_DMC},
+		{"CAV901E", PMU_TYPE_CCPI2},
+		{"", PMU_TYPE_INVALID}
+	};
+
+	while (devices[i].type != PMU_TYPE_INVALID) {
+		if (!strcmp(acpi_device_hid(adev), devices[i].id))
+			break;
+		i++;
+	}
+
+	return devices[i].type;
+}
+
+static bool tx2_uncore_validate_event(struct pmu *pmu,
+				  struct perf_event *event, int *counters)
+{
+	if (is_software_event(event))
+		return true;
+	/* Reject groups spanning multiple HW PMUs. */
+	if (event->pmu != pmu)
+		return false;
+
+	*counters = *counters + 1;
+	return true;
+}
+
+/*
+ * Make sure the group of events can be scheduled at once
+ * on the PMU.
+ */
+static bool tx2_uncore_validate_event_group(struct perf_event *event,
+		int max_counters)
+{
+	struct perf_event *sibling, *leader = event->group_leader;
+	int counters = 0;
+
+	if (event->group_leader == event)
+		return true;
+
+	if (!tx2_uncore_validate_event(event->pmu, leader, &counters))
+		return false;
+
+	for_each_sibling_event(sibling, leader) {
+		if (!tx2_uncore_validate_event(event->pmu, sibling, &counters))
+			return false;
+	}
+
+	if (!tx2_uncore_validate_event(event->pmu, event, &counters))
+		return false;
+
+	/*
+	 * If the group requires more counters than the HW has,
+	 * it cannot ever be scheduled.
+	 */
+	return counters <= max_counters;
+}
+
+
+static int tx2_uncore_event_init(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct tx2_uncore_pmu *tx2_pmu;
+
+	/* Test the event attr type check for PMU enumeration */
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/*
+	 * SOC PMU counters are shared across all cores.
+	 * Therefore, it does not support per-process mode.
+	 * Also, it does not support event sampling mode.
+	 */
+	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+		return -EINVAL;
+
+	if (event->cpu < 0)
+		return -EINVAL;
+
+	tx2_pmu = pmu_to_tx2_pmu(event->pmu);
+	if (tx2_pmu->cpu >= nr_cpu_ids)
+		return -EINVAL;
+	event->cpu = tx2_pmu->cpu;
+
+	if (event->attr.config >= tx2_pmu->max_events)
+		return -EINVAL;
+
+	/* store event id */
+	hwc->config = event->attr.config;
+
+	/* Validate the group */
+	if (!tx2_uncore_validate_event_group(event, tx2_pmu->max_counters))
+		return -EINVAL;
+
+	return 0;
+}
+
+static void tx2_uncore_event_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct tx2_uncore_pmu *tx2_pmu;
+
+	hwc->state = 0;
+	tx2_pmu = pmu_to_tx2_pmu(event->pmu);
+
+	tx2_pmu->start_event(event, flags);
+	perf_event_update_userpage(event);
+
+	/* No hrtimer needed for CCPI2, 64-bit counters */
+	if (!tx2_pmu->hrtimer_callback)
+		return;
+
+	/* Start timer for first event */
+	if (bitmap_weight(tx2_pmu->active_counters,
+				tx2_pmu->max_counters) == 1) {
+		hrtimer_start(&tx2_pmu->hrtimer,
+			ns_to_ktime(tx2_pmu->hrtimer_interval),
+			HRTIMER_MODE_REL_PINNED);
+	}
+}
+
+static void tx2_uncore_event_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct tx2_uncore_pmu *tx2_pmu;
+
+	if (hwc->state & PERF_HES_UPTODATE)
+		return;
+
+	tx2_pmu = pmu_to_tx2_pmu(event->pmu);
+	tx2_pmu->stop_event(event);
+	WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+	hwc->state |= PERF_HES_STOPPED;
+	if (flags & PERF_EF_UPDATE) {
+		tx2_uncore_event_update(event);
+		hwc->state |= PERF_HES_UPTODATE;
+	}
+}
+
+static int tx2_uncore_event_add(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct tx2_uncore_pmu *tx2_pmu;
+
+	tx2_pmu = pmu_to_tx2_pmu(event->pmu);
+
+	/* Allocate a free counter */
+	hwc->idx  = alloc_counter(tx2_pmu);
+	if (hwc->idx < 0)
+		return -EAGAIN;
+
+	tx2_pmu->events[hwc->idx] = event;
+	/* set counter control and data registers base address */
+	tx2_pmu->init_cntr_base(event, tx2_pmu);
+
+	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+	if (flags & PERF_EF_START)
+		tx2_uncore_event_start(event, flags);
+
+	return 0;
+}
+
+static void tx2_uncore_event_del(struct perf_event *event, int flags)
+{
+	struct tx2_uncore_pmu *tx2_pmu = pmu_to_tx2_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u32 cmask;
+
+	cmask = tx2_pmu->counters_mask;
+	tx2_uncore_event_stop(event, PERF_EF_UPDATE);
+
+	/* clear the assigned counter */
+	free_counter(tx2_pmu, GET_COUNTERID(event, cmask));
+
+	perf_event_update_userpage(event);
+	tx2_pmu->events[hwc->idx] = NULL;
+	hwc->idx = -1;
+
+	if (!tx2_pmu->hrtimer_callback)
+		return;
+
+	if (bitmap_empty(tx2_pmu->active_counters, tx2_pmu->max_counters))
+		hrtimer_cancel(&tx2_pmu->hrtimer);
+}
+
+static void tx2_uncore_event_read(struct perf_event *event)
+{
+	tx2_uncore_event_update(event);
+}
+
+static enum hrtimer_restart tx2_hrtimer_callback(struct hrtimer *timer)
+{
+	struct tx2_uncore_pmu *tx2_pmu;
+	int max_counters, idx;
+
+	tx2_pmu = container_of(timer, struct tx2_uncore_pmu, hrtimer);
+	max_counters = tx2_pmu->max_counters;
+
+	if (bitmap_empty(tx2_pmu->active_counters, max_counters))
+		return HRTIMER_NORESTART;
+
+	for_each_set_bit(idx, tx2_pmu->active_counters, max_counters) {
+		struct perf_event *event = tx2_pmu->events[idx];
+
+		tx2_uncore_event_update(event);
+	}
+	hrtimer_forward_now(timer, ns_to_ktime(tx2_pmu->hrtimer_interval));
+	return HRTIMER_RESTART;
+}
+
+static int tx2_uncore_pmu_register(
+		struct tx2_uncore_pmu *tx2_pmu)
+{
+	struct device *dev = tx2_pmu->dev;
+	char *name = tx2_pmu->name;
+
+	/* Perf event registration */
+	tx2_pmu->pmu = (struct pmu) {
+		.module         = THIS_MODULE,
+		.attr_groups	= tx2_pmu->attr_groups,
+		.task_ctx_nr	= perf_invalid_context,
+		.event_init	= tx2_uncore_event_init,
+		.add		= tx2_uncore_event_add,
+		.del		= tx2_uncore_event_del,
+		.start		= tx2_uncore_event_start,
+		.stop		= tx2_uncore_event_stop,
+		.read		= tx2_uncore_event_read,
+		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
+	};
+
+	tx2_pmu->pmu.name = devm_kasprintf(dev, GFP_KERNEL,
+			"%s", name);
+
+	return perf_pmu_register(&tx2_pmu->pmu, tx2_pmu->pmu.name, -1);
+}
+
+static int tx2_uncore_pmu_add_dev(struct tx2_uncore_pmu *tx2_pmu)
+{
+	int ret, cpu;
+
+	cpu = cpumask_any_and(cpumask_of_node(tx2_pmu->node),
+			cpu_online_mask);
+
+	tx2_pmu->cpu = cpu;
+
+	if (tx2_pmu->hrtimer_callback) {
+		hrtimer_init(&tx2_pmu->hrtimer,
+				CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		tx2_pmu->hrtimer.function = tx2_pmu->hrtimer_callback;
+	}
+
+	ret = tx2_uncore_pmu_register(tx2_pmu);
+	if (ret) {
+		dev_err(tx2_pmu->dev, "%s PMU: Failed to init driver\n",
+				tx2_pmu->name);
+		return -ENODEV;
+	}
+
+	/* register hotplug callback for the pmu */
+	ret = cpuhp_state_add_instance(
+			CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE,
+			&tx2_pmu->hpnode);
+	if (ret) {
+		dev_err(tx2_pmu->dev, "Error %d registering hotplug", ret);
+		return ret;
+	}
+
+	/* Add to list */
+	list_add(&tx2_pmu->entry, &tx2_pmus);
+
+	dev_dbg(tx2_pmu->dev, "%s PMU UNCORE registered\n",
+			tx2_pmu->pmu.name);
+	return ret;
+}
+
+static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev,
+		acpi_handle handle, struct acpi_device *adev, u32 type)
+{
+	struct tx2_uncore_pmu *tx2_pmu;
+	void __iomem *base;
+	struct resource res;
+	struct resource_entry *rentry;
+	struct list_head list;
+	int ret;
+
+	INIT_LIST_HEAD(&list);
+	ret = acpi_dev_get_resources(adev, &list, NULL, NULL);
+	if (ret <= 0) {
+		dev_err(dev, "failed to parse _CRS method, error %d\n", ret);
+		return NULL;
+	}
+
+	list_for_each_entry(rentry, &list, node) {
+		if (resource_type(rentry->res) == IORESOURCE_MEM) {
+			res = *rentry->res;
+			rentry = NULL;
+			break;
+		}
+	}
+	acpi_dev_free_resource_list(&list);
+
+	if (rentry) {
+		dev_err(dev, "PMU type %d: Fail to find resource\n", type);
+		return NULL;
+	}
+
+	base = devm_ioremap_resource(dev, &res);
+	if (IS_ERR(base))
+		return NULL;
+
+	tx2_pmu = devm_kzalloc(dev, sizeof(*tx2_pmu), GFP_KERNEL);
+	if (!tx2_pmu)
+		return NULL;
+
+	tx2_pmu->dev = dev;
+	tx2_pmu->type = type;
+	tx2_pmu->base = base;
+	tx2_pmu->node = dev_to_node(dev);
+	INIT_LIST_HEAD(&tx2_pmu->entry);
+
+	switch (tx2_pmu->type) {
+	case PMU_TYPE_L3C:
+		tx2_pmu->max_counters = TX2_PMU_DMC_L3C_MAX_COUNTERS;
+		tx2_pmu->counters_mask = 0x3;
+		tx2_pmu->prorate_factor = TX2_PMU_L3_TILES;
+		tx2_pmu->max_events = L3_EVENT_MAX;
+		tx2_pmu->events_mask = 0x1f;
+		tx2_pmu->hrtimer_interval = TX2_PMU_HRTIMER_INTERVAL;
+		tx2_pmu->hrtimer_callback = tx2_hrtimer_callback;
+		tx2_pmu->attr_groups = l3c_pmu_attr_groups;
+		tx2_pmu->name = devm_kasprintf(dev, GFP_KERNEL,
+				"uncore_l3c_%d", tx2_pmu->node);
+		tx2_pmu->init_cntr_base = init_cntr_base_l3c;
+		tx2_pmu->start_event = uncore_start_event_l3c;
+		tx2_pmu->stop_event = uncore_stop_event_l3c;
+		break;
+	case PMU_TYPE_DMC:
+		tx2_pmu->max_counters = TX2_PMU_DMC_L3C_MAX_COUNTERS;
+		tx2_pmu->counters_mask = 0x3;
+		tx2_pmu->prorate_factor = TX2_PMU_DMC_CHANNELS;
+		tx2_pmu->max_events = DMC_EVENT_MAX;
+		tx2_pmu->events_mask = 0x1f;
+		tx2_pmu->hrtimer_interval = TX2_PMU_HRTIMER_INTERVAL;
+		tx2_pmu->hrtimer_callback = tx2_hrtimer_callback;
+		tx2_pmu->attr_groups = dmc_pmu_attr_groups;
+		tx2_pmu->name = devm_kasprintf(dev, GFP_KERNEL,
+				"uncore_dmc_%d", tx2_pmu->node);
+		tx2_pmu->init_cntr_base = init_cntr_base_dmc;
+		tx2_pmu->start_event = uncore_start_event_dmc;
+		tx2_pmu->stop_event = uncore_stop_event_dmc;
+		break;
+	case PMU_TYPE_CCPI2:
+		/* CCPI2 has 8 counters */
+		tx2_pmu->max_counters = TX2_PMU_CCPI2_MAX_COUNTERS;
+		tx2_pmu->counters_mask = 0x7;
+		tx2_pmu->prorate_factor = 1;
+		tx2_pmu->max_events = CCPI2_EVENT_MAX;
+		tx2_pmu->events_mask = 0x1ff;
+		tx2_pmu->attr_groups = ccpi2_pmu_attr_groups;
+		tx2_pmu->name = devm_kasprintf(dev, GFP_KERNEL,
+				"uncore_ccpi2_%d", tx2_pmu->node);
+		tx2_pmu->init_cntr_base = init_cntr_base_ccpi2;
+		tx2_pmu->start_event = uncore_start_event_ccpi2;
+		tx2_pmu->stop_event = uncore_stop_event_ccpi2;
+		tx2_pmu->hrtimer_callback = NULL;
+		break;
+	case PMU_TYPE_INVALID:
+		devm_kfree(dev, tx2_pmu);
+		return NULL;
+	}
+
+	return tx2_pmu;
+}
+
+static acpi_status tx2_uncore_pmu_add(acpi_handle handle, u32 level,
+				    void *data, void **return_value)
+{
+	struct acpi_device *adev = acpi_fetch_acpi_dev(handle);
+	struct tx2_uncore_pmu *tx2_pmu;
+	enum tx2_uncore_type type;
+
+	if (!adev || acpi_bus_get_status(adev) || !adev->status.present)
+		return AE_OK;
+
+	type = get_tx2_pmu_type(adev);
+	if (type == PMU_TYPE_INVALID)
+		return AE_OK;
+
+	tx2_pmu = tx2_uncore_pmu_init_dev((struct device *)data,
+			handle, adev, type);
+
+	if (!tx2_pmu)
+		return AE_ERROR;
+
+	if (tx2_uncore_pmu_add_dev(tx2_pmu)) {
+		/* Can't add the PMU device, abort */
+		return AE_ERROR;
+	}
+	return AE_OK;
+}
+
+static int tx2_uncore_pmu_online_cpu(unsigned int cpu,
+		struct hlist_node *hpnode)
+{
+	struct tx2_uncore_pmu *tx2_pmu;
+
+	tx2_pmu = hlist_entry_safe(hpnode,
+			struct tx2_uncore_pmu, hpnode);
+
+	/* Pick this CPU, If there is no CPU/PMU association and both are
+	 * from same node.
+	 */
+	if ((tx2_pmu->cpu >= nr_cpu_ids) &&
+		(tx2_pmu->node == cpu_to_node(cpu)))
+		tx2_pmu->cpu = cpu;
+
+	return 0;
+}
+
+static int tx2_uncore_pmu_offline_cpu(unsigned int cpu,
+		struct hlist_node *hpnode)
+{
+	int new_cpu;
+	struct tx2_uncore_pmu *tx2_pmu;
+	struct cpumask cpu_online_mask_temp;
+
+	tx2_pmu = hlist_entry_safe(hpnode,
+			struct tx2_uncore_pmu, hpnode);
+
+	if (cpu != tx2_pmu->cpu)
+		return 0;
+
+	if (tx2_pmu->hrtimer_callback)
+		hrtimer_cancel(&tx2_pmu->hrtimer);
+
+	cpumask_copy(&cpu_online_mask_temp, cpu_online_mask);
+	cpumask_clear_cpu(cpu, &cpu_online_mask_temp);
+	new_cpu = cpumask_any_and(
+			cpumask_of_node(tx2_pmu->node),
+			&cpu_online_mask_temp);
+
+	tx2_pmu->cpu = new_cpu;
+	if (new_cpu >= nr_cpu_ids)
+		return 0;
+	perf_pmu_migrate_context(&tx2_pmu->pmu, cpu, new_cpu);
+
+	return 0;
+}
+
+static const struct acpi_device_id tx2_uncore_acpi_match[] = {
+	{"CAV901C", 0},
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, tx2_uncore_acpi_match);
+
+static int tx2_uncore_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	acpi_handle handle;
+	acpi_status status;
+
+	set_dev_node(dev, acpi_get_node(ACPI_HANDLE(dev)));
+
+	if (!has_acpi_companion(dev))
+		return -ENODEV;
+
+	handle = ACPI_HANDLE(dev);
+	if (!handle)
+		return -EINVAL;
+
+	/* Walk through the tree for all PMU UNCORE devices */
+	status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
+				     tx2_uncore_pmu_add,
+				     NULL, dev, NULL);
+	if (ACPI_FAILURE(status)) {
+		dev_err(dev, "failed to probe PMU devices\n");
+		return_ACPI_STATUS(status);
+	}
+
+	dev_info(dev, "node%d: pmu uncore registered\n", dev_to_node(dev));
+	return 0;
+}
+
+static int tx2_uncore_remove(struct platform_device *pdev)
+{
+	struct tx2_uncore_pmu *tx2_pmu, *temp;
+	struct device *dev = &pdev->dev;
+
+	if (!list_empty(&tx2_pmus)) {
+		list_for_each_entry_safe(tx2_pmu, temp, &tx2_pmus, entry) {
+			if (tx2_pmu->node == dev_to_node(dev)) {
+				cpuhp_state_remove_instance_nocalls(
+					CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE,
+					&tx2_pmu->hpnode);
+				perf_pmu_unregister(&tx2_pmu->pmu);
+				list_del(&tx2_pmu->entry);
+			}
+		}
+	}
+	return 0;
+}
+
+static struct platform_driver tx2_uncore_driver = {
+	.driver = {
+		.name		= "tx2-uncore-pmu",
+		.acpi_match_table = ACPI_PTR(tx2_uncore_acpi_match),
+		.suppress_bind_attrs = true,
+	},
+	.probe = tx2_uncore_probe,
+	.remove = tx2_uncore_remove,
+};
+
+static int __init tx2_uncore_driver_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE,
+				      "perf/tx2/uncore:online",
+				      tx2_uncore_pmu_online_cpu,
+				      tx2_uncore_pmu_offline_cpu);
+	if (ret) {
+		pr_err("TX2 PMU: setup hotplug failed(%d)\n", ret);
+		return ret;
+	}
+	ret = platform_driver_register(&tx2_uncore_driver);
+	if (ret)
+		cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE);
+
+	return ret;
+}
+module_init(tx2_uncore_driver_init);
+
+static void __exit tx2_uncore_driver_exit(void)
+{
+	platform_driver_unregister(&tx2_uncore_driver);
+	cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE);
+}
+module_exit(tx2_uncore_driver_exit);
+
+MODULE_DESCRIPTION("ThunderX2 UNCORE PMU driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Ganapatrao Kulkarni <gkulkarni@cavium.com>");
diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
new file mode 100644
index 0000000000..9972bfc11a
--- /dev/null
+++ b/drivers/perf/xgene_pmu.c
@@ -0,0 +1,1976 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * APM X-Gene SoC PMU (Performance Monitor Unit)
+ *
+ * Copyright (c) 2016, Applied Micro Circuits Corporation
+ * Author: Hoan Tran <hotran@apm.com>
+ *         Tai Nguyen <ttnguyen@apm.com>
+ */
+
+#include <linux/acpi.h>
+#include <linux/clk.h>
+#include <linux/cpuhotplug.h>
+#include <linux/cpumask.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+#define CSW_CSWCR                       0x0000
+#define  CSW_CSWCR_DUALMCB_MASK         BIT(0)
+#define  CSW_CSWCR_MCB0_ROUTING(x)	(((x) & 0x0C) >> 2)
+#define  CSW_CSWCR_MCB1_ROUTING(x)	(((x) & 0x30) >> 4)
+#define MCBADDRMR                       0x0000
+#define  MCBADDRMR_DUALMCU_MODE_MASK    BIT(2)
+
+#define PCPPMU_INTSTATUS_REG	0x000
+#define PCPPMU_INTMASK_REG	0x004
+#define  PCPPMU_INTMASK		0x0000000F
+#define  PCPPMU_INTENMASK	0xFFFFFFFF
+#define  PCPPMU_INTCLRMASK	0xFFFFFFF0
+#define  PCPPMU_INT_MCU		BIT(0)
+#define  PCPPMU_INT_MCB		BIT(1)
+#define  PCPPMU_INT_L3C		BIT(2)
+#define  PCPPMU_INT_IOB		BIT(3)
+
+#define  PCPPMU_V3_INTMASK	0x00FF33FF
+#define  PCPPMU_V3_INTENMASK	0xFFFFFFFF
+#define  PCPPMU_V3_INTCLRMASK	0xFF00CC00
+#define  PCPPMU_V3_INT_MCU	0x000000FF
+#define  PCPPMU_V3_INT_MCB	0x00000300
+#define  PCPPMU_V3_INT_L3C	0x00FF0000
+#define  PCPPMU_V3_INT_IOB	0x00003000
+
+#define PMU_MAX_COUNTERS	4
+#define PMU_CNT_MAX_PERIOD	0xFFFFFFFFULL
+#define PMU_V3_CNT_MAX_PERIOD	0xFFFFFFFFFFFFFFFFULL
+#define PMU_OVERFLOW_MASK	0xF
+#define PMU_PMCR_E		BIT(0)
+#define PMU_PMCR_P		BIT(1)
+
+#define PMU_PMEVCNTR0		0x000
+#define PMU_PMEVCNTR1		0x004
+#define PMU_PMEVCNTR2		0x008
+#define PMU_PMEVCNTR3		0x00C
+#define PMU_PMEVTYPER0		0x400
+#define PMU_PMEVTYPER1		0x404
+#define PMU_PMEVTYPER2		0x408
+#define PMU_PMEVTYPER3		0x40C
+#define PMU_PMAMR0		0xA00
+#define PMU_PMAMR1		0xA04
+#define PMU_PMCNTENSET		0xC00
+#define PMU_PMCNTENCLR		0xC20
+#define PMU_PMINTENSET		0xC40
+#define PMU_PMINTENCLR		0xC60
+#define PMU_PMOVSR		0xC80
+#define PMU_PMCR		0xE04
+
+/* PMU registers for V3 */
+#define PMU_PMOVSCLR		0xC80
+#define PMU_PMOVSSET		0xCC0
+
+#define to_pmu_dev(p)     container_of(p, struct xgene_pmu_dev, pmu)
+#define GET_CNTR(ev)      (ev->hw.idx)
+#define GET_EVENTID(ev)   (ev->hw.config & 0xFFULL)
+#define GET_AGENTID(ev)   (ev->hw.config_base & 0xFFFFFFFFUL)
+#define GET_AGENT1ID(ev)  ((ev->hw.config_base >> 32) & 0xFFFFFFFFUL)
+
+struct hw_pmu_info {
+	u32 type;
+	u32 enable_mask;
+	void __iomem *csr;
+};
+
+struct xgene_pmu_dev {
+	struct hw_pmu_info *inf;
+	struct xgene_pmu *parent;
+	struct pmu pmu;
+	u8 max_counters;
+	DECLARE_BITMAP(cntr_assign_mask, PMU_MAX_COUNTERS);
+	u64 max_period;
+	const struct attribute_group **attr_groups;
+	struct perf_event *pmu_counter_event[PMU_MAX_COUNTERS];
+};
+
+struct xgene_pmu_ops {
+	void (*mask_int)(struct xgene_pmu *pmu);
+	void (*unmask_int)(struct xgene_pmu *pmu);
+	u64 (*read_counter)(struct xgene_pmu_dev *pmu, int idx);
+	void (*write_counter)(struct xgene_pmu_dev *pmu, int idx, u64 val);
+	void (*write_evttype)(struct xgene_pmu_dev *pmu_dev, int idx, u32 val);
+	void (*write_agentmsk)(struct xgene_pmu_dev *pmu_dev, u32 val);
+	void (*write_agent1msk)(struct xgene_pmu_dev *pmu_dev, u32 val);
+	void (*enable_counter)(struct xgene_pmu_dev *pmu_dev, int idx);
+	void (*disable_counter)(struct xgene_pmu_dev *pmu_dev, int idx);
+	void (*enable_counter_int)(struct xgene_pmu_dev *pmu_dev, int idx);
+	void (*disable_counter_int)(struct xgene_pmu_dev *pmu_dev, int idx);
+	void (*reset_counters)(struct xgene_pmu_dev *pmu_dev);
+	void (*start_counters)(struct xgene_pmu_dev *pmu_dev);
+	void (*stop_counters)(struct xgene_pmu_dev *pmu_dev);
+};
+
+struct xgene_pmu {
+	struct device *dev;
+	struct hlist_node node;
+	int version;
+	void __iomem *pcppmu_csr;
+	u32 mcb_active_mask;
+	u32 mc_active_mask;
+	u32 l3c_active_mask;
+	cpumask_t cpu;
+	int irq;
+	raw_spinlock_t lock;
+	const struct xgene_pmu_ops *ops;
+	struct list_head l3cpmus;
+	struct list_head iobpmus;
+	struct list_head mcbpmus;
+	struct list_head mcpmus;
+};
+
+struct xgene_pmu_dev_ctx {
+	char *name;
+	struct list_head next;
+	struct xgene_pmu_dev *pmu_dev;
+	struct hw_pmu_info inf;
+};
+
+struct xgene_pmu_data {
+	int id;
+	u32 data;
+};
+
+enum xgene_pmu_version {
+	PCP_PMU_V1 = 1,
+	PCP_PMU_V2,
+	PCP_PMU_V3,
+};
+
+enum xgene_pmu_dev_type {
+	PMU_TYPE_L3C = 0,
+	PMU_TYPE_IOB,
+	PMU_TYPE_IOB_SLOW,
+	PMU_TYPE_MCB,
+	PMU_TYPE_MC,
+};
+
+/*
+ * sysfs format attributes
+ */
+static ssize_t xgene_pmu_format_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *eattr;
+
+	eattr = container_of(attr, struct dev_ext_attribute, attr);
+	return sysfs_emit(buf, "%s\n", (char *) eattr->var);
+}
+
+#define XGENE_PMU_FORMAT_ATTR(_name, _config)		\
+	(&((struct dev_ext_attribute[]) {		\
+		{ .attr = __ATTR(_name, S_IRUGO, xgene_pmu_format_show, NULL), \
+		  .var = (void *) _config, }		\
+	})[0].attr.attr)
+
+static struct attribute *l3c_pmu_format_attrs[] = {
+	XGENE_PMU_FORMAT_ATTR(l3c_eventid, "config:0-7"),
+	XGENE_PMU_FORMAT_ATTR(l3c_agentid, "config1:0-9"),
+	NULL,
+};
+
+static struct attribute *iob_pmu_format_attrs[] = {
+	XGENE_PMU_FORMAT_ATTR(iob_eventid, "config:0-7"),
+	XGENE_PMU_FORMAT_ATTR(iob_agentid, "config1:0-63"),
+	NULL,
+};
+
+static struct attribute *mcb_pmu_format_attrs[] = {
+	XGENE_PMU_FORMAT_ATTR(mcb_eventid, "config:0-5"),
+	XGENE_PMU_FORMAT_ATTR(mcb_agentid, "config1:0-9"),
+	NULL,
+};
+
+static struct attribute *mc_pmu_format_attrs[] = {
+	XGENE_PMU_FORMAT_ATTR(mc_eventid, "config:0-28"),
+	NULL,
+};
+
+static const struct attribute_group l3c_pmu_format_attr_group = {
+	.name = "format",
+	.attrs = l3c_pmu_format_attrs,
+};
+
+static const struct attribute_group iob_pmu_format_attr_group = {
+	.name = "format",
+	.attrs = iob_pmu_format_attrs,
+};
+
+static const struct attribute_group mcb_pmu_format_attr_group = {
+	.name = "format",
+	.attrs = mcb_pmu_format_attrs,
+};
+
+static const struct attribute_group mc_pmu_format_attr_group = {
+	.name = "format",
+	.attrs = mc_pmu_format_attrs,
+};
+
+static struct attribute *l3c_pmu_v3_format_attrs[] = {
+	XGENE_PMU_FORMAT_ATTR(l3c_eventid, "config:0-39"),
+	NULL,
+};
+
+static struct attribute *iob_pmu_v3_format_attrs[] = {
+	XGENE_PMU_FORMAT_ATTR(iob_eventid, "config:0-47"),
+	NULL,
+};
+
+static struct attribute *iob_slow_pmu_v3_format_attrs[] = {
+	XGENE_PMU_FORMAT_ATTR(iob_slow_eventid, "config:0-16"),
+	NULL,
+};
+
+static struct attribute *mcb_pmu_v3_format_attrs[] = {
+	XGENE_PMU_FORMAT_ATTR(mcb_eventid, "config:0-35"),
+	NULL,
+};
+
+static struct attribute *mc_pmu_v3_format_attrs[] = {
+	XGENE_PMU_FORMAT_ATTR(mc_eventid, "config:0-44"),
+	NULL,
+};
+
+static const struct attribute_group l3c_pmu_v3_format_attr_group = {
+	.name = "format",
+	.attrs = l3c_pmu_v3_format_attrs,
+};
+
+static const struct attribute_group iob_pmu_v3_format_attr_group = {
+	.name = "format",
+	.attrs = iob_pmu_v3_format_attrs,
+};
+
+static const struct attribute_group iob_slow_pmu_v3_format_attr_group = {
+	.name = "format",
+	.attrs = iob_slow_pmu_v3_format_attrs,
+};
+
+static const struct attribute_group mcb_pmu_v3_format_attr_group = {
+	.name = "format",
+	.attrs = mcb_pmu_v3_format_attrs,
+};
+
+static const struct attribute_group mc_pmu_v3_format_attr_group = {
+	.name = "format",
+	.attrs = mc_pmu_v3_format_attrs,
+};
+
+/*
+ * sysfs event attributes
+ */
+static ssize_t xgene_pmu_event_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct perf_pmu_events_attr *pmu_attr =
+		container_of(attr, struct perf_pmu_events_attr, attr);
+
+	return sysfs_emit(buf, "config=0x%llx\n", pmu_attr->id);
+}
+
+#define XGENE_PMU_EVENT_ATTR(_name, _config)		\
+	PMU_EVENT_ATTR_ID(_name, xgene_pmu_event_show, _config)
+
+static struct attribute *l3c_pmu_events_attrs[] = {
+	XGENE_PMU_EVENT_ATTR(cycle-count,			0x00),
+	XGENE_PMU_EVENT_ATTR(cycle-count-div-64,		0x01),
+	XGENE_PMU_EVENT_ATTR(read-hit,				0x02),
+	XGENE_PMU_EVENT_ATTR(read-miss,				0x03),
+	XGENE_PMU_EVENT_ATTR(write-need-replacement,		0x06),
+	XGENE_PMU_EVENT_ATTR(write-not-need-replacement,	0x07),
+	XGENE_PMU_EVENT_ATTR(tq-full,				0x08),
+	XGENE_PMU_EVENT_ATTR(ackq-full,				0x09),
+	XGENE_PMU_EVENT_ATTR(wdb-full,				0x0a),
+	XGENE_PMU_EVENT_ATTR(bank-fifo-full,			0x0b),
+	XGENE_PMU_EVENT_ATTR(odb-full,				0x0c),
+	XGENE_PMU_EVENT_ATTR(wbq-full,				0x0d),
+	XGENE_PMU_EVENT_ATTR(bank-conflict-fifo-issue,		0x0e),
+	XGENE_PMU_EVENT_ATTR(bank-fifo-issue,			0x0f),
+	NULL,
+};
+
+static struct attribute *iob_pmu_events_attrs[] = {
+	XGENE_PMU_EVENT_ATTR(cycle-count,			0x00),
+	XGENE_PMU_EVENT_ATTR(cycle-count-div-64,		0x01),
+	XGENE_PMU_EVENT_ATTR(axi0-read,				0x02),
+	XGENE_PMU_EVENT_ATTR(axi0-read-partial,			0x03),
+	XGENE_PMU_EVENT_ATTR(axi1-read,				0x04),
+	XGENE_PMU_EVENT_ATTR(axi1-read-partial,			0x05),
+	XGENE_PMU_EVENT_ATTR(csw-read-block,			0x06),
+	XGENE_PMU_EVENT_ATTR(csw-read-partial,			0x07),
+	XGENE_PMU_EVENT_ATTR(axi0-write,			0x10),
+	XGENE_PMU_EVENT_ATTR(axi0-write-partial,		0x11),
+	XGENE_PMU_EVENT_ATTR(axi1-write,			0x13),
+	XGENE_PMU_EVENT_ATTR(axi1-write-partial,		0x14),
+	XGENE_PMU_EVENT_ATTR(csw-inbound-dirty,			0x16),
+	NULL,
+};
+
+static struct attribute *mcb_pmu_events_attrs[] = {
+	XGENE_PMU_EVENT_ATTR(cycle-count,			0x00),
+	XGENE_PMU_EVENT_ATTR(cycle-count-div-64,		0x01),
+	XGENE_PMU_EVENT_ATTR(csw-read,				0x02),
+	XGENE_PMU_EVENT_ATTR(csw-write-request,			0x03),
+	XGENE_PMU_EVENT_ATTR(mcb-csw-stall,			0x04),
+	XGENE_PMU_EVENT_ATTR(cancel-read-gack,			0x05),
+	NULL,
+};
+
+static struct attribute *mc_pmu_events_attrs[] = {
+	XGENE_PMU_EVENT_ATTR(cycle-count,			0x00),
+	XGENE_PMU_EVENT_ATTR(cycle-count-div-64,		0x01),
+	XGENE_PMU_EVENT_ATTR(act-cmd-sent,			0x02),
+	XGENE_PMU_EVENT_ATTR(pre-cmd-sent,			0x03),
+	XGENE_PMU_EVENT_ATTR(rd-cmd-sent,			0x04),
+	XGENE_PMU_EVENT_ATTR(rda-cmd-sent,			0x05),
+	XGENE_PMU_EVENT_ATTR(wr-cmd-sent,			0x06),
+	XGENE_PMU_EVENT_ATTR(wra-cmd-sent,			0x07),
+	XGENE_PMU_EVENT_ATTR(pde-cmd-sent,			0x08),
+	XGENE_PMU_EVENT_ATTR(sre-cmd-sent,			0x09),
+	XGENE_PMU_EVENT_ATTR(prea-cmd-sent,			0x0a),
+	XGENE_PMU_EVENT_ATTR(ref-cmd-sent,			0x0b),
+	XGENE_PMU_EVENT_ATTR(rd-rda-cmd-sent,			0x0c),
+	XGENE_PMU_EVENT_ATTR(wr-wra-cmd-sent,			0x0d),
+	XGENE_PMU_EVENT_ATTR(in-rd-collision,			0x0e),
+	XGENE_PMU_EVENT_ATTR(in-wr-collision,			0x0f),
+	XGENE_PMU_EVENT_ATTR(collision-queue-not-empty,		0x10),
+	XGENE_PMU_EVENT_ATTR(collision-queue-full,		0x11),
+	XGENE_PMU_EVENT_ATTR(mcu-request,			0x12),
+	XGENE_PMU_EVENT_ATTR(mcu-rd-request,			0x13),
+	XGENE_PMU_EVENT_ATTR(mcu-hp-rd-request,			0x14),
+	XGENE_PMU_EVENT_ATTR(mcu-wr-request,			0x15),
+	XGENE_PMU_EVENT_ATTR(mcu-rd-proceed-all,		0x16),
+	XGENE_PMU_EVENT_ATTR(mcu-rd-proceed-cancel,		0x17),
+	XGENE_PMU_EVENT_ATTR(mcu-rd-response,			0x18),
+	XGENE_PMU_EVENT_ATTR(mcu-rd-proceed-speculative-all,	0x19),
+	XGENE_PMU_EVENT_ATTR(mcu-rd-proceed-speculative-cancel,	0x1a),
+	XGENE_PMU_EVENT_ATTR(mcu-wr-proceed-all,		0x1b),
+	XGENE_PMU_EVENT_ATTR(mcu-wr-proceed-cancel,		0x1c),
+	NULL,
+};
+
+static const struct attribute_group l3c_pmu_events_attr_group = {
+	.name = "events",
+	.attrs = l3c_pmu_events_attrs,
+};
+
+static const struct attribute_group iob_pmu_events_attr_group = {
+	.name = "events",
+	.attrs = iob_pmu_events_attrs,
+};
+
+static const struct attribute_group mcb_pmu_events_attr_group = {
+	.name = "events",
+	.attrs = mcb_pmu_events_attrs,
+};
+
+static const struct attribute_group mc_pmu_events_attr_group = {
+	.name = "events",
+	.attrs = mc_pmu_events_attrs,
+};
+
+static struct attribute *l3c_pmu_v3_events_attrs[] = {
+	XGENE_PMU_EVENT_ATTR(cycle-count,			0x00),
+	XGENE_PMU_EVENT_ATTR(read-hit,				0x01),
+	XGENE_PMU_EVENT_ATTR(read-miss,				0x02),
+	XGENE_PMU_EVENT_ATTR(index-flush-eviction,		0x03),
+	XGENE_PMU_EVENT_ATTR(write-caused-replacement,		0x04),
+	XGENE_PMU_EVENT_ATTR(write-not-caused-replacement,	0x05),
+	XGENE_PMU_EVENT_ATTR(clean-eviction,			0x06),
+	XGENE_PMU_EVENT_ATTR(dirty-eviction,			0x07),
+	XGENE_PMU_EVENT_ATTR(read,				0x08),
+	XGENE_PMU_EVENT_ATTR(write,				0x09),
+	XGENE_PMU_EVENT_ATTR(request,				0x0a),
+	XGENE_PMU_EVENT_ATTR(tq-bank-conflict-issue-stall,	0x0b),
+	XGENE_PMU_EVENT_ATTR(tq-full,				0x0c),
+	XGENE_PMU_EVENT_ATTR(ackq-full,				0x0d),
+	XGENE_PMU_EVENT_ATTR(wdb-full,				0x0e),
+	XGENE_PMU_EVENT_ATTR(odb-full,				0x10),
+	XGENE_PMU_EVENT_ATTR(wbq-full,				0x11),
+	XGENE_PMU_EVENT_ATTR(input-req-async-fifo-stall,	0x12),
+	XGENE_PMU_EVENT_ATTR(output-req-async-fifo-stall,	0x13),
+	XGENE_PMU_EVENT_ATTR(output-data-async-fifo-stall,	0x14),
+	XGENE_PMU_EVENT_ATTR(total-insertion,			0x15),
+	XGENE_PMU_EVENT_ATTR(sip-insertions-r-set,		0x16),
+	XGENE_PMU_EVENT_ATTR(sip-insertions-r-clear,		0x17),
+	XGENE_PMU_EVENT_ATTR(dip-insertions-r-set,		0x18),
+	XGENE_PMU_EVENT_ATTR(dip-insertions-r-clear,		0x19),
+	XGENE_PMU_EVENT_ATTR(dip-insertions-force-r-set,	0x1a),
+	XGENE_PMU_EVENT_ATTR(egression,				0x1b),
+	XGENE_PMU_EVENT_ATTR(replacement,			0x1c),
+	XGENE_PMU_EVENT_ATTR(old-replacement,			0x1d),
+	XGENE_PMU_EVENT_ATTR(young-replacement,			0x1e),
+	XGENE_PMU_EVENT_ATTR(r-set-replacement,			0x1f),
+	XGENE_PMU_EVENT_ATTR(r-clear-replacement,		0x20),
+	XGENE_PMU_EVENT_ATTR(old-r-replacement,			0x21),
+	XGENE_PMU_EVENT_ATTR(old-nr-replacement,		0x22),
+	XGENE_PMU_EVENT_ATTR(young-r-replacement,		0x23),
+	XGENE_PMU_EVENT_ATTR(young-nr-replacement,		0x24),
+	XGENE_PMU_EVENT_ATTR(bloomfilter-clearing,		0x25),
+	XGENE_PMU_EVENT_ATTR(generation-flip,			0x26),
+	XGENE_PMU_EVENT_ATTR(vcc-droop-detected,		0x27),
+	NULL,
+};
+
+static struct attribute *iob_fast_pmu_v3_events_attrs[] = {
+	XGENE_PMU_EVENT_ATTR(cycle-count,			0x00),
+	XGENE_PMU_EVENT_ATTR(pa-req-buf-alloc-all,		0x01),
+	XGENE_PMU_EVENT_ATTR(pa-req-buf-alloc-rd,		0x02),
+	XGENE_PMU_EVENT_ATTR(pa-req-buf-alloc-wr,		0x03),
+	XGENE_PMU_EVENT_ATTR(pa-all-cp-req,			0x04),
+	XGENE_PMU_EVENT_ATTR(pa-cp-blk-req,			0x05),
+	XGENE_PMU_EVENT_ATTR(pa-cp-ptl-req,			0x06),
+	XGENE_PMU_EVENT_ATTR(pa-cp-rd-req,			0x07),
+	XGENE_PMU_EVENT_ATTR(pa-cp-wr-req,			0x08),
+	XGENE_PMU_EVENT_ATTR(ba-all-req,			0x09),
+	XGENE_PMU_EVENT_ATTR(ba-rd-req,				0x0a),
+	XGENE_PMU_EVENT_ATTR(ba-wr-req,				0x0b),
+	XGENE_PMU_EVENT_ATTR(pa-rd-shared-req-issued,		0x10),
+	XGENE_PMU_EVENT_ATTR(pa-rd-exclusive-req-issued,	0x11),
+	XGENE_PMU_EVENT_ATTR(pa-wr-invalidate-req-issued-stashable, 0x12),
+	XGENE_PMU_EVENT_ATTR(pa-wr-invalidate-req-issued-nonstashable, 0x13),
+	XGENE_PMU_EVENT_ATTR(pa-wr-back-req-issued-stashable,	0x14),
+	XGENE_PMU_EVENT_ATTR(pa-wr-back-req-issued-nonstashable, 0x15),
+	XGENE_PMU_EVENT_ATTR(pa-ptl-wr-req,			0x16),
+	XGENE_PMU_EVENT_ATTR(pa-ptl-rd-req,			0x17),
+	XGENE_PMU_EVENT_ATTR(pa-wr-back-clean-data,		0x18),
+	XGENE_PMU_EVENT_ATTR(pa-wr-back-cancelled-on-SS,	0x1b),
+	XGENE_PMU_EVENT_ATTR(pa-barrier-occurrence,		0x1c),
+	XGENE_PMU_EVENT_ATTR(pa-barrier-cycles,			0x1d),
+	XGENE_PMU_EVENT_ATTR(pa-total-cp-snoops,		0x20),
+	XGENE_PMU_EVENT_ATTR(pa-rd-shared-snoop,		0x21),
+	XGENE_PMU_EVENT_ATTR(pa-rd-shared-snoop-hit,		0x22),
+	XGENE_PMU_EVENT_ATTR(pa-rd-exclusive-snoop,		0x23),
+	XGENE_PMU_EVENT_ATTR(pa-rd-exclusive-snoop-hit,		0x24),
+	XGENE_PMU_EVENT_ATTR(pa-rd-wr-invalid-snoop,		0x25),
+	XGENE_PMU_EVENT_ATTR(pa-rd-wr-invalid-snoop-hit,	0x26),
+	XGENE_PMU_EVENT_ATTR(pa-req-buffer-full,		0x28),
+	XGENE_PMU_EVENT_ATTR(cswlf-outbound-req-fifo-full,	0x29),
+	XGENE_PMU_EVENT_ATTR(cswlf-inbound-snoop-fifo-backpressure, 0x2a),
+	XGENE_PMU_EVENT_ATTR(cswlf-outbound-lack-fifo-full,	0x2b),
+	XGENE_PMU_EVENT_ATTR(cswlf-inbound-gack-fifo-backpressure, 0x2c),
+	XGENE_PMU_EVENT_ATTR(cswlf-outbound-data-fifo-full,	0x2d),
+	XGENE_PMU_EVENT_ATTR(cswlf-inbound-data-fifo-backpressure, 0x2e),
+	XGENE_PMU_EVENT_ATTR(cswlf-inbound-req-backpressure,	0x2f),
+	NULL,
+};
+
+static struct attribute *iob_slow_pmu_v3_events_attrs[] = {
+	XGENE_PMU_EVENT_ATTR(cycle-count,			0x00),
+	XGENE_PMU_EVENT_ATTR(pa-axi0-rd-req,			0x01),
+	XGENE_PMU_EVENT_ATTR(pa-axi0-wr-req,			0x02),
+	XGENE_PMU_EVENT_ATTR(pa-axi1-rd-req,			0x03),
+	XGENE_PMU_EVENT_ATTR(pa-axi1-wr-req,			0x04),
+	XGENE_PMU_EVENT_ATTR(ba-all-axi-req,			0x07),
+	XGENE_PMU_EVENT_ATTR(ba-axi-rd-req,			0x08),
+	XGENE_PMU_EVENT_ATTR(ba-axi-wr-req,			0x09),
+	XGENE_PMU_EVENT_ATTR(ba-free-list-empty,		0x10),
+	NULL,
+};
+
+static struct attribute *mcb_pmu_v3_events_attrs[] = {
+	XGENE_PMU_EVENT_ATTR(cycle-count,			0x00),
+	XGENE_PMU_EVENT_ATTR(req-receive,			0x01),
+	XGENE_PMU_EVENT_ATTR(rd-req-recv,			0x02),
+	XGENE_PMU_EVENT_ATTR(rd-req-recv-2,			0x03),
+	XGENE_PMU_EVENT_ATTR(wr-req-recv,			0x04),
+	XGENE_PMU_EVENT_ATTR(wr-req-recv-2,			0x05),
+	XGENE_PMU_EVENT_ATTR(rd-req-sent-to-mcu,		0x06),
+	XGENE_PMU_EVENT_ATTR(rd-req-sent-to-mcu-2,		0x07),
+	XGENE_PMU_EVENT_ATTR(rd-req-sent-to-spec-mcu,		0x08),
+	XGENE_PMU_EVENT_ATTR(rd-req-sent-to-spec-mcu-2,		0x09),
+	XGENE_PMU_EVENT_ATTR(glbl-ack-recv-for-rd-sent-to-spec-mcu, 0x0a),
+	XGENE_PMU_EVENT_ATTR(glbl-ack-go-recv-for-rd-sent-to-spec-mcu, 0x0b),
+	XGENE_PMU_EVENT_ATTR(glbl-ack-nogo-recv-for-rd-sent-to-spec-mcu, 0x0c),
+	XGENE_PMU_EVENT_ATTR(glbl-ack-go-recv-any-rd-req,	0x0d),
+	XGENE_PMU_EVENT_ATTR(glbl-ack-go-recv-any-rd-req-2,	0x0e),
+	XGENE_PMU_EVENT_ATTR(wr-req-sent-to-mcu,		0x0f),
+	XGENE_PMU_EVENT_ATTR(gack-recv,				0x10),
+	XGENE_PMU_EVENT_ATTR(rd-gack-recv,			0x11),
+	XGENE_PMU_EVENT_ATTR(wr-gack-recv,			0x12),
+	XGENE_PMU_EVENT_ATTR(cancel-rd-gack,			0x13),
+	XGENE_PMU_EVENT_ATTR(cancel-wr-gack,			0x14),
+	XGENE_PMU_EVENT_ATTR(mcb-csw-req-stall,			0x15),
+	XGENE_PMU_EVENT_ATTR(mcu-req-intf-blocked,		0x16),
+	XGENE_PMU_EVENT_ATTR(mcb-mcu-rd-intf-stall,		0x17),
+	XGENE_PMU_EVENT_ATTR(csw-rd-intf-blocked,		0x18),
+	XGENE_PMU_EVENT_ATTR(csw-local-ack-intf-blocked,	0x19),
+	XGENE_PMU_EVENT_ATTR(mcu-req-table-full,		0x1a),
+	XGENE_PMU_EVENT_ATTR(mcu-stat-table-full,		0x1b),
+	XGENE_PMU_EVENT_ATTR(mcu-wr-table-full,			0x1c),
+	XGENE_PMU_EVENT_ATTR(mcu-rdreceipt-resp,		0x1d),
+	XGENE_PMU_EVENT_ATTR(mcu-wrcomplete-resp,		0x1e),
+	XGENE_PMU_EVENT_ATTR(mcu-retryack-resp,			0x1f),
+	XGENE_PMU_EVENT_ATTR(mcu-pcrdgrant-resp,		0x20),
+	XGENE_PMU_EVENT_ATTR(mcu-req-from-lastload,		0x21),
+	XGENE_PMU_EVENT_ATTR(mcu-req-from-bypass,		0x22),
+	XGENE_PMU_EVENT_ATTR(volt-droop-detect,			0x23),
+	NULL,
+};
+
+static struct attribute *mc_pmu_v3_events_attrs[] = {
+	XGENE_PMU_EVENT_ATTR(cycle-count,			0x00),
+	XGENE_PMU_EVENT_ATTR(act-sent,				0x01),
+	XGENE_PMU_EVENT_ATTR(pre-sent,				0x02),
+	XGENE_PMU_EVENT_ATTR(rd-sent,				0x03),
+	XGENE_PMU_EVENT_ATTR(rda-sent,				0x04),
+	XGENE_PMU_EVENT_ATTR(wr-sent,				0x05),
+	XGENE_PMU_EVENT_ATTR(wra-sent,				0x06),
+	XGENE_PMU_EVENT_ATTR(pd-entry-vld,			0x07),
+	XGENE_PMU_EVENT_ATTR(sref-entry-vld,			0x08),
+	XGENE_PMU_EVENT_ATTR(prea-sent,				0x09),
+	XGENE_PMU_EVENT_ATTR(ref-sent,				0x0a),
+	XGENE_PMU_EVENT_ATTR(rd-rda-sent,			0x0b),
+	XGENE_PMU_EVENT_ATTR(wr-wra-sent,			0x0c),
+	XGENE_PMU_EVENT_ATTR(raw-hazard,			0x0d),
+	XGENE_PMU_EVENT_ATTR(war-hazard,			0x0e),
+	XGENE_PMU_EVENT_ATTR(waw-hazard,			0x0f),
+	XGENE_PMU_EVENT_ATTR(rar-hazard,			0x10),
+	XGENE_PMU_EVENT_ATTR(raw-war-waw-hazard,		0x11),
+	XGENE_PMU_EVENT_ATTR(hprd-lprd-wr-req-vld,		0x12),
+	XGENE_PMU_EVENT_ATTR(lprd-req-vld,			0x13),
+	XGENE_PMU_EVENT_ATTR(hprd-req-vld,			0x14),
+	XGENE_PMU_EVENT_ATTR(hprd-lprd-req-vld,			0x15),
+	XGENE_PMU_EVENT_ATTR(wr-req-vld,			0x16),
+	XGENE_PMU_EVENT_ATTR(partial-wr-req-vld,		0x17),
+	XGENE_PMU_EVENT_ATTR(rd-retry,				0x18),
+	XGENE_PMU_EVENT_ATTR(wr-retry,				0x19),
+	XGENE_PMU_EVENT_ATTR(retry-gnt,				0x1a),
+	XGENE_PMU_EVENT_ATTR(rank-change,			0x1b),
+	XGENE_PMU_EVENT_ATTR(dir-change,			0x1c),
+	XGENE_PMU_EVENT_ATTR(rank-dir-change,			0x1d),
+	XGENE_PMU_EVENT_ATTR(rank-active,			0x1e),
+	XGENE_PMU_EVENT_ATTR(rank-idle,				0x1f),
+	XGENE_PMU_EVENT_ATTR(rank-pd,				0x20),
+	XGENE_PMU_EVENT_ATTR(rank-sref,				0x21),
+	XGENE_PMU_EVENT_ATTR(queue-fill-gt-thresh,		0x22),
+	XGENE_PMU_EVENT_ATTR(queue-rds-gt-thresh,		0x23),
+	XGENE_PMU_EVENT_ATTR(queue-wrs-gt-thresh,		0x24),
+	XGENE_PMU_EVENT_ATTR(phy-updt-complt,			0x25),
+	XGENE_PMU_EVENT_ATTR(tz-fail,				0x26),
+	XGENE_PMU_EVENT_ATTR(dram-errc,				0x27),
+	XGENE_PMU_EVENT_ATTR(dram-errd,				0x28),
+	XGENE_PMU_EVENT_ATTR(rd-enq,				0x29),
+	XGENE_PMU_EVENT_ATTR(wr-enq,				0x2a),
+	XGENE_PMU_EVENT_ATTR(tmac-limit-reached,		0x2b),
+	XGENE_PMU_EVENT_ATTR(tmaw-tracker-full,			0x2c),
+	NULL,
+};
+
+static const struct attribute_group l3c_pmu_v3_events_attr_group = {
+	.name = "events",
+	.attrs = l3c_pmu_v3_events_attrs,
+};
+
+static const struct attribute_group iob_fast_pmu_v3_events_attr_group = {
+	.name = "events",
+	.attrs = iob_fast_pmu_v3_events_attrs,
+};
+
+static const struct attribute_group iob_slow_pmu_v3_events_attr_group = {
+	.name = "events",
+	.attrs = iob_slow_pmu_v3_events_attrs,
+};
+
+static const struct attribute_group mcb_pmu_v3_events_attr_group = {
+	.name = "events",
+	.attrs = mcb_pmu_v3_events_attrs,
+};
+
+static const struct attribute_group mc_pmu_v3_events_attr_group = {
+	.name = "events",
+	.attrs = mc_pmu_v3_events_attrs,
+};
+
+/*
+ * sysfs cpumask attributes
+ */
+static ssize_t cpumask_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(dev_get_drvdata(dev));
+
+	return cpumap_print_to_pagebuf(true, buf, &pmu_dev->parent->cpu);
+}
+
+static DEVICE_ATTR_RO(cpumask);
+
+static struct attribute *xgene_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static const struct attribute_group pmu_cpumask_attr_group = {
+	.attrs = xgene_pmu_cpumask_attrs,
+};
+
+/*
+ * Per PMU device attribute groups of PMU v1 and v2
+ */
+static const struct attribute_group *l3c_pmu_attr_groups[] = {
+	&l3c_pmu_format_attr_group,
+	&pmu_cpumask_attr_group,
+	&l3c_pmu_events_attr_group,
+	NULL
+};
+
+static const struct attribute_group *iob_pmu_attr_groups[] = {
+	&iob_pmu_format_attr_group,
+	&pmu_cpumask_attr_group,
+	&iob_pmu_events_attr_group,
+	NULL
+};
+
+static const struct attribute_group *mcb_pmu_attr_groups[] = {
+	&mcb_pmu_format_attr_group,
+	&pmu_cpumask_attr_group,
+	&mcb_pmu_events_attr_group,
+	NULL
+};
+
+static const struct attribute_group *mc_pmu_attr_groups[] = {
+	&mc_pmu_format_attr_group,
+	&pmu_cpumask_attr_group,
+	&mc_pmu_events_attr_group,
+	NULL
+};
+
+/*
+ * Per PMU device attribute groups of PMU v3
+ */
+static const struct attribute_group *l3c_pmu_v3_attr_groups[] = {
+	&l3c_pmu_v3_format_attr_group,
+	&pmu_cpumask_attr_group,
+	&l3c_pmu_v3_events_attr_group,
+	NULL
+};
+
+static const struct attribute_group *iob_fast_pmu_v3_attr_groups[] = {
+	&iob_pmu_v3_format_attr_group,
+	&pmu_cpumask_attr_group,
+	&iob_fast_pmu_v3_events_attr_group,
+	NULL
+};
+
+static const struct attribute_group *iob_slow_pmu_v3_attr_groups[] = {
+	&iob_slow_pmu_v3_format_attr_group,
+	&pmu_cpumask_attr_group,
+	&iob_slow_pmu_v3_events_attr_group,
+	NULL
+};
+
+static const struct attribute_group *mcb_pmu_v3_attr_groups[] = {
+	&mcb_pmu_v3_format_attr_group,
+	&pmu_cpumask_attr_group,
+	&mcb_pmu_v3_events_attr_group,
+	NULL
+};
+
+static const struct attribute_group *mc_pmu_v3_attr_groups[] = {
+	&mc_pmu_v3_format_attr_group,
+	&pmu_cpumask_attr_group,
+	&mc_pmu_v3_events_attr_group,
+	NULL
+};
+
+static int get_next_avail_cntr(struct xgene_pmu_dev *pmu_dev)
+{
+	int cntr;
+
+	cntr = find_first_zero_bit(pmu_dev->cntr_assign_mask,
+				pmu_dev->max_counters);
+	if (cntr == pmu_dev->max_counters)
+		return -ENOSPC;
+	set_bit(cntr, pmu_dev->cntr_assign_mask);
+
+	return cntr;
+}
+
+static void clear_avail_cntr(struct xgene_pmu_dev *pmu_dev, int cntr)
+{
+	clear_bit(cntr, pmu_dev->cntr_assign_mask);
+}
+
+static inline void xgene_pmu_mask_int(struct xgene_pmu *xgene_pmu)
+{
+	writel(PCPPMU_INTENMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG);
+}
+
+static inline void xgene_pmu_v3_mask_int(struct xgene_pmu *xgene_pmu)
+{
+	writel(PCPPMU_V3_INTENMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG);
+}
+
+static inline void xgene_pmu_unmask_int(struct xgene_pmu *xgene_pmu)
+{
+	writel(PCPPMU_INTCLRMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG);
+}
+
+static inline void xgene_pmu_v3_unmask_int(struct xgene_pmu *xgene_pmu)
+{
+	writel(PCPPMU_V3_INTCLRMASK,
+	       xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG);
+}
+
+static inline u64 xgene_pmu_read_counter32(struct xgene_pmu_dev *pmu_dev,
+					   int idx)
+{
+	return readl(pmu_dev->inf->csr + PMU_PMEVCNTR0 + (4 * idx));
+}
+
+static inline u64 xgene_pmu_read_counter64(struct xgene_pmu_dev *pmu_dev,
+					   int idx)
+{
+	u32 lo, hi;
+
+	/*
+	 * v3 has 64-bit counter registers composed by 2 32-bit registers
+	 * This can be a problem if the counter increases and carries
+	 * out of bit [31] between 2 reads. The extra reads would help
+	 * to prevent this issue.
+	 */
+	do {
+		hi = xgene_pmu_read_counter32(pmu_dev, 2 * idx + 1);
+		lo = xgene_pmu_read_counter32(pmu_dev, 2 * idx);
+	} while (hi != xgene_pmu_read_counter32(pmu_dev, 2 * idx + 1));
+
+	return (((u64)hi << 32) | lo);
+}
+
+static inline void
+xgene_pmu_write_counter32(struct xgene_pmu_dev *pmu_dev, int idx, u64 val)
+{
+	writel(val, pmu_dev->inf->csr + PMU_PMEVCNTR0 + (4 * idx));
+}
+
+static inline void
+xgene_pmu_write_counter64(struct xgene_pmu_dev *pmu_dev, int idx, u64 val)
+{
+	u32 cnt_lo, cnt_hi;
+
+	cnt_hi = upper_32_bits(val);
+	cnt_lo = lower_32_bits(val);
+
+	/* v3 has 64-bit counter registers composed by 2 32-bit registers */
+	xgene_pmu_write_counter32(pmu_dev, 2 * idx, cnt_lo);
+	xgene_pmu_write_counter32(pmu_dev, 2 * idx + 1, cnt_hi);
+}
+
+static inline void
+xgene_pmu_write_evttype(struct xgene_pmu_dev *pmu_dev, int idx, u32 val)
+{
+	writel(val, pmu_dev->inf->csr + PMU_PMEVTYPER0 + (4 * idx));
+}
+
+static inline void
+xgene_pmu_write_agentmsk(struct xgene_pmu_dev *pmu_dev, u32 val)
+{
+	writel(val, pmu_dev->inf->csr + PMU_PMAMR0);
+}
+
+static inline void
+xgene_pmu_v3_write_agentmsk(struct xgene_pmu_dev *pmu_dev, u32 val) { }
+
+static inline void
+xgene_pmu_write_agent1msk(struct xgene_pmu_dev *pmu_dev, u32 val)
+{
+	writel(val, pmu_dev->inf->csr + PMU_PMAMR1);
+}
+
+static inline void
+xgene_pmu_v3_write_agent1msk(struct xgene_pmu_dev *pmu_dev, u32 val) { }
+
+static inline void
+xgene_pmu_enable_counter(struct xgene_pmu_dev *pmu_dev, int idx)
+{
+	u32 val;
+
+	val = readl(pmu_dev->inf->csr + PMU_PMCNTENSET);
+	val |= 1 << idx;
+	writel(val, pmu_dev->inf->csr + PMU_PMCNTENSET);
+}
+
+static inline void
+xgene_pmu_disable_counter(struct xgene_pmu_dev *pmu_dev, int idx)
+{
+	u32 val;
+
+	val = readl(pmu_dev->inf->csr + PMU_PMCNTENCLR);
+	val |= 1 << idx;
+	writel(val, pmu_dev->inf->csr + PMU_PMCNTENCLR);
+}
+
+static inline void
+xgene_pmu_enable_counter_int(struct xgene_pmu_dev *pmu_dev, int idx)
+{
+	u32 val;
+
+	val = readl(pmu_dev->inf->csr + PMU_PMINTENSET);
+	val |= 1 << idx;
+	writel(val, pmu_dev->inf->csr + PMU_PMINTENSET);
+}
+
+static inline void
+xgene_pmu_disable_counter_int(struct xgene_pmu_dev *pmu_dev, int idx)
+{
+	u32 val;
+
+	val = readl(pmu_dev->inf->csr + PMU_PMINTENCLR);
+	val |= 1 << idx;
+	writel(val, pmu_dev->inf->csr + PMU_PMINTENCLR);
+}
+
+static inline void xgene_pmu_reset_counters(struct xgene_pmu_dev *pmu_dev)
+{
+	u32 val;
+
+	val = readl(pmu_dev->inf->csr + PMU_PMCR);
+	val |= PMU_PMCR_P;
+	writel(val, pmu_dev->inf->csr + PMU_PMCR);
+}
+
+static inline void xgene_pmu_start_counters(struct xgene_pmu_dev *pmu_dev)
+{
+	u32 val;
+
+	val = readl(pmu_dev->inf->csr + PMU_PMCR);
+	val |= PMU_PMCR_E;
+	writel(val, pmu_dev->inf->csr + PMU_PMCR);
+}
+
+static inline void xgene_pmu_stop_counters(struct xgene_pmu_dev *pmu_dev)
+{
+	u32 val;
+
+	val = readl(pmu_dev->inf->csr + PMU_PMCR);
+	val &= ~PMU_PMCR_E;
+	writel(val, pmu_dev->inf->csr + PMU_PMCR);
+}
+
+static void xgene_perf_pmu_enable(struct pmu *pmu)
+{
+	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(pmu);
+	struct xgene_pmu *xgene_pmu = pmu_dev->parent;
+	bool enabled = !bitmap_empty(pmu_dev->cntr_assign_mask,
+			pmu_dev->max_counters);
+
+	if (!enabled)
+		return;
+
+	xgene_pmu->ops->start_counters(pmu_dev);
+}
+
+static void xgene_perf_pmu_disable(struct pmu *pmu)
+{
+	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(pmu);
+	struct xgene_pmu *xgene_pmu = pmu_dev->parent;
+
+	xgene_pmu->ops->stop_counters(pmu_dev);
+}
+
+static int xgene_perf_event_init(struct perf_event *event)
+{
+	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+	struct hw_perf_event *hw = &event->hw;
+	struct perf_event *sibling;
+
+	/* Test the event attr type check for PMU enumeration */
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/*
+	 * SOC PMU counters are shared across all cores.
+	 * Therefore, it does not support per-process mode.
+	 * Also, it does not support event sampling mode.
+	 */
+	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+		return -EINVAL;
+
+	if (event->cpu < 0)
+		return -EINVAL;
+	/*
+	 * Many perf core operations (eg. events rotation) operate on a
+	 * single CPU context. This is obvious for CPU PMUs, where one
+	 * expects the same sets of events being observed on all CPUs,
+	 * but can lead to issues for off-core PMUs, where each
+	 * event could be theoretically assigned to a different CPU. To
+	 * mitigate this, we enforce CPU assignment to one, selected
+	 * processor (the one described in the "cpumask" attribute).
+	 */
+	event->cpu = cpumask_first(&pmu_dev->parent->cpu);
+
+	hw->config = event->attr.config;
+	/*
+	 * Each bit of the config1 field represents an agent from which the
+	 * request of the event come. The event is counted only if it's caused
+	 * by a request of an agent has the bit cleared.
+	 * By default, the event is counted for all agents.
+	 */
+	hw->config_base = event->attr.config1;
+
+	/*
+	 * We must NOT create groups containing mixed PMUs, although software
+	 * events are acceptable
+	 */
+	if (event->group_leader->pmu != event->pmu &&
+			!is_software_event(event->group_leader))
+		return -EINVAL;
+
+	for_each_sibling_event(sibling, event->group_leader) {
+		if (sibling->pmu != event->pmu &&
+				!is_software_event(sibling))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void xgene_perf_enable_event(struct perf_event *event)
+{
+	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+	struct xgene_pmu *xgene_pmu = pmu_dev->parent;
+
+	xgene_pmu->ops->write_evttype(pmu_dev, GET_CNTR(event),
+				      GET_EVENTID(event));
+	xgene_pmu->ops->write_agentmsk(pmu_dev, ~((u32)GET_AGENTID(event)));
+	if (pmu_dev->inf->type == PMU_TYPE_IOB)
+		xgene_pmu->ops->write_agent1msk(pmu_dev,
+						~((u32)GET_AGENT1ID(event)));
+
+	xgene_pmu->ops->enable_counter(pmu_dev, GET_CNTR(event));
+	xgene_pmu->ops->enable_counter_int(pmu_dev, GET_CNTR(event));
+}
+
+static void xgene_perf_disable_event(struct perf_event *event)
+{
+	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+	struct xgene_pmu *xgene_pmu = pmu_dev->parent;
+
+	xgene_pmu->ops->disable_counter(pmu_dev, GET_CNTR(event));
+	xgene_pmu->ops->disable_counter_int(pmu_dev, GET_CNTR(event));
+}
+
+static void xgene_perf_event_set_period(struct perf_event *event)
+{
+	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+	struct xgene_pmu *xgene_pmu = pmu_dev->parent;
+	struct hw_perf_event *hw = &event->hw;
+	/*
+	 * For 32 bit counter, it has a period of 2^32. To account for the
+	 * possibility of extreme interrupt latency we program for a period of
+	 * half that. Hopefully, we can handle the interrupt before another 2^31
+	 * events occur and the counter overtakes its previous value.
+	 * For 64 bit counter, we don't expect it overflow.
+	 */
+	u64 val = 1ULL << 31;
+
+	local64_set(&hw->prev_count, val);
+	xgene_pmu->ops->write_counter(pmu_dev, hw->idx, val);
+}
+
+static void xgene_perf_event_update(struct perf_event *event)
+{
+	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+	struct xgene_pmu *xgene_pmu = pmu_dev->parent;
+	struct hw_perf_event *hw = &event->hw;
+	u64 delta, prev_raw_count, new_raw_count;
+
+again:
+	prev_raw_count = local64_read(&hw->prev_count);
+	new_raw_count = xgene_pmu->ops->read_counter(pmu_dev, GET_CNTR(event));
+
+	if (local64_cmpxchg(&hw->prev_count, prev_raw_count,
+			    new_raw_count) != prev_raw_count)
+		goto again;
+
+	delta = (new_raw_count - prev_raw_count) & pmu_dev->max_period;
+
+	local64_add(delta, &event->count);
+}
+
+static void xgene_perf_read(struct perf_event *event)
+{
+	xgene_perf_event_update(event);
+}
+
+static void xgene_perf_start(struct perf_event *event, int flags)
+{
+	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+	struct xgene_pmu *xgene_pmu = pmu_dev->parent;
+	struct hw_perf_event *hw = &event->hw;
+
+	if (WARN_ON_ONCE(!(hw->state & PERF_HES_STOPPED)))
+		return;
+
+	WARN_ON_ONCE(!(hw->state & PERF_HES_UPTODATE));
+	hw->state = 0;
+
+	xgene_perf_event_set_period(event);
+
+	if (flags & PERF_EF_RELOAD) {
+		u64 prev_raw_count =  local64_read(&hw->prev_count);
+
+		xgene_pmu->ops->write_counter(pmu_dev, GET_CNTR(event),
+					      prev_raw_count);
+	}
+
+	xgene_perf_enable_event(event);
+	perf_event_update_userpage(event);
+}
+
+static void xgene_perf_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hw = &event->hw;
+
+	if (hw->state & PERF_HES_UPTODATE)
+		return;
+
+	xgene_perf_disable_event(event);
+	WARN_ON_ONCE(hw->state & PERF_HES_STOPPED);
+	hw->state |= PERF_HES_STOPPED;
+
+	if (hw->state & PERF_HES_UPTODATE)
+		return;
+
+	xgene_perf_read(event);
+	hw->state |= PERF_HES_UPTODATE;
+}
+
+static int xgene_perf_add(struct perf_event *event, int flags)
+{
+	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+	struct hw_perf_event *hw = &event->hw;
+
+	hw->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+	/* Allocate an event counter */
+	hw->idx = get_next_avail_cntr(pmu_dev);
+	if (hw->idx < 0)
+		return -EAGAIN;
+
+	/* Update counter event pointer for Interrupt handler */
+	pmu_dev->pmu_counter_event[hw->idx] = event;
+
+	if (flags & PERF_EF_START)
+		xgene_perf_start(event, PERF_EF_RELOAD);
+
+	return 0;
+}
+
+static void xgene_perf_del(struct perf_event *event, int flags)
+{
+	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+	struct hw_perf_event *hw = &event->hw;
+
+	xgene_perf_stop(event, PERF_EF_UPDATE);
+
+	/* clear the assigned counter */
+	clear_avail_cntr(pmu_dev, GET_CNTR(event));
+
+	perf_event_update_userpage(event);
+	pmu_dev->pmu_counter_event[hw->idx] = NULL;
+}
+
+static int xgene_init_perf(struct xgene_pmu_dev *pmu_dev, char *name)
+{
+	struct xgene_pmu *xgene_pmu;
+
+	if (pmu_dev->parent->version == PCP_PMU_V3)
+		pmu_dev->max_period = PMU_V3_CNT_MAX_PERIOD;
+	else
+		pmu_dev->max_period = PMU_CNT_MAX_PERIOD;
+	/* First version PMU supports only single event counter */
+	xgene_pmu = pmu_dev->parent;
+	if (xgene_pmu->version == PCP_PMU_V1)
+		pmu_dev->max_counters = 1;
+	else
+		pmu_dev->max_counters = PMU_MAX_COUNTERS;
+
+	/* Perf driver registration */
+	pmu_dev->pmu = (struct pmu) {
+		.attr_groups	= pmu_dev->attr_groups,
+		.task_ctx_nr	= perf_invalid_context,
+		.pmu_enable	= xgene_perf_pmu_enable,
+		.pmu_disable	= xgene_perf_pmu_disable,
+		.event_init	= xgene_perf_event_init,
+		.add		= xgene_perf_add,
+		.del		= xgene_perf_del,
+		.start		= xgene_perf_start,
+		.stop		= xgene_perf_stop,
+		.read		= xgene_perf_read,
+		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
+	};
+
+	/* Hardware counter init */
+	xgene_pmu->ops->stop_counters(pmu_dev);
+	xgene_pmu->ops->reset_counters(pmu_dev);
+
+	return perf_pmu_register(&pmu_dev->pmu, name, -1);
+}
+
+static int
+xgene_pmu_dev_add(struct xgene_pmu *xgene_pmu, struct xgene_pmu_dev_ctx *ctx)
+{
+	struct device *dev = xgene_pmu->dev;
+	struct xgene_pmu_dev *pmu;
+
+	pmu = devm_kzalloc(dev, sizeof(*pmu), GFP_KERNEL);
+	if (!pmu)
+		return -ENOMEM;
+	pmu->parent = xgene_pmu;
+	pmu->inf = &ctx->inf;
+	ctx->pmu_dev = pmu;
+
+	switch (pmu->inf->type) {
+	case PMU_TYPE_L3C:
+		if (!(xgene_pmu->l3c_active_mask & pmu->inf->enable_mask))
+			return -ENODEV;
+		if (xgene_pmu->version == PCP_PMU_V3)
+			pmu->attr_groups = l3c_pmu_v3_attr_groups;
+		else
+			pmu->attr_groups = l3c_pmu_attr_groups;
+		break;
+	case PMU_TYPE_IOB:
+		if (xgene_pmu->version == PCP_PMU_V3)
+			pmu->attr_groups = iob_fast_pmu_v3_attr_groups;
+		else
+			pmu->attr_groups = iob_pmu_attr_groups;
+		break;
+	case PMU_TYPE_IOB_SLOW:
+		if (xgene_pmu->version == PCP_PMU_V3)
+			pmu->attr_groups = iob_slow_pmu_v3_attr_groups;
+		break;
+	case PMU_TYPE_MCB:
+		if (!(xgene_pmu->mcb_active_mask & pmu->inf->enable_mask))
+			return -ENODEV;
+		if (xgene_pmu->version == PCP_PMU_V3)
+			pmu->attr_groups = mcb_pmu_v3_attr_groups;
+		else
+			pmu->attr_groups = mcb_pmu_attr_groups;
+		break;
+	case PMU_TYPE_MC:
+		if (!(xgene_pmu->mc_active_mask & pmu->inf->enable_mask))
+			return -ENODEV;
+		if (xgene_pmu->version == PCP_PMU_V3)
+			pmu->attr_groups = mc_pmu_v3_attr_groups;
+		else
+			pmu->attr_groups = mc_pmu_attr_groups;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (xgene_init_perf(pmu, ctx->name)) {
+		dev_err(dev, "%s PMU: Failed to init perf driver\n", ctx->name);
+		return -ENODEV;
+	}
+
+	dev_info(dev, "%s PMU registered\n", ctx->name);
+
+	return 0;
+}
+
+static void _xgene_pmu_isr(int irq, struct xgene_pmu_dev *pmu_dev)
+{
+	struct xgene_pmu *xgene_pmu = pmu_dev->parent;
+	void __iomem *csr = pmu_dev->inf->csr;
+	u32 pmovsr;
+	int idx;
+
+	xgene_pmu->ops->stop_counters(pmu_dev);
+
+	if (xgene_pmu->version == PCP_PMU_V3)
+		pmovsr = readl(csr + PMU_PMOVSSET) & PMU_OVERFLOW_MASK;
+	else
+		pmovsr = readl(csr + PMU_PMOVSR) & PMU_OVERFLOW_MASK;
+
+	if (!pmovsr)
+		goto out;
+
+	/* Clear interrupt flag */
+	if (xgene_pmu->version == PCP_PMU_V1)
+		writel(0x0, csr + PMU_PMOVSR);
+	else if (xgene_pmu->version == PCP_PMU_V2)
+		writel(pmovsr, csr + PMU_PMOVSR);
+	else
+		writel(pmovsr, csr + PMU_PMOVSCLR);
+
+	for (idx = 0; idx < PMU_MAX_COUNTERS; idx++) {
+		struct perf_event *event = pmu_dev->pmu_counter_event[idx];
+		int overflowed = pmovsr & BIT(idx);
+
+		/* Ignore if we don't have an event. */
+		if (!event || !overflowed)
+			continue;
+		xgene_perf_event_update(event);
+		xgene_perf_event_set_period(event);
+	}
+
+out:
+	xgene_pmu->ops->start_counters(pmu_dev);
+}
+
+static irqreturn_t xgene_pmu_isr(int irq, void *dev_id)
+{
+	u32 intr_mcu, intr_mcb, intr_l3c, intr_iob;
+	struct xgene_pmu_dev_ctx *ctx;
+	struct xgene_pmu *xgene_pmu = dev_id;
+	u32 val;
+
+	raw_spin_lock(&xgene_pmu->lock);
+
+	/* Get Interrupt PMU source */
+	val = readl(xgene_pmu->pcppmu_csr + PCPPMU_INTSTATUS_REG);
+	if (xgene_pmu->version == PCP_PMU_V3) {
+		intr_mcu = PCPPMU_V3_INT_MCU;
+		intr_mcb = PCPPMU_V3_INT_MCB;
+		intr_l3c = PCPPMU_V3_INT_L3C;
+		intr_iob = PCPPMU_V3_INT_IOB;
+	} else {
+		intr_mcu = PCPPMU_INT_MCU;
+		intr_mcb = PCPPMU_INT_MCB;
+		intr_l3c = PCPPMU_INT_L3C;
+		intr_iob = PCPPMU_INT_IOB;
+	}
+	if (val & intr_mcu) {
+		list_for_each_entry(ctx, &xgene_pmu->mcpmus, next) {
+			_xgene_pmu_isr(irq, ctx->pmu_dev);
+		}
+	}
+	if (val & intr_mcb) {
+		list_for_each_entry(ctx, &xgene_pmu->mcbpmus, next) {
+			_xgene_pmu_isr(irq, ctx->pmu_dev);
+		}
+	}
+	if (val & intr_l3c) {
+		list_for_each_entry(ctx, &xgene_pmu->l3cpmus, next) {
+			_xgene_pmu_isr(irq, ctx->pmu_dev);
+		}
+	}
+	if (val & intr_iob) {
+		list_for_each_entry(ctx, &xgene_pmu->iobpmus, next) {
+			_xgene_pmu_isr(irq, ctx->pmu_dev);
+		}
+	}
+
+	raw_spin_unlock(&xgene_pmu->lock);
+
+	return IRQ_HANDLED;
+}
+
+static int acpi_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
+					     struct platform_device *pdev)
+{
+	void __iomem *csw_csr, *mcba_csr, *mcbb_csr;
+	unsigned int reg;
+
+	csw_csr = devm_platform_ioremap_resource(pdev, 1);
+	if (IS_ERR(csw_csr)) {
+		dev_err(&pdev->dev, "ioremap failed for CSW CSR resource\n");
+		return PTR_ERR(csw_csr);
+	}
+
+	mcba_csr = devm_platform_ioremap_resource(pdev, 2);
+	if (IS_ERR(mcba_csr)) {
+		dev_err(&pdev->dev, "ioremap failed for MCBA CSR resource\n");
+		return PTR_ERR(mcba_csr);
+	}
+
+	mcbb_csr = devm_platform_ioremap_resource(pdev, 3);
+	if (IS_ERR(mcbb_csr)) {
+		dev_err(&pdev->dev, "ioremap failed for MCBB CSR resource\n");
+		return PTR_ERR(mcbb_csr);
+	}
+
+	xgene_pmu->l3c_active_mask = 0x1;
+
+	reg = readl(csw_csr + CSW_CSWCR);
+	if (reg & CSW_CSWCR_DUALMCB_MASK) {
+		/* Dual MCB active */
+		xgene_pmu->mcb_active_mask = 0x3;
+		/* Probe all active MC(s) */
+		reg = readl(mcbb_csr + CSW_CSWCR);
+		xgene_pmu->mc_active_mask =
+			(reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0xF : 0x5;
+	} else {
+		/* Single MCB active */
+		xgene_pmu->mcb_active_mask = 0x1;
+		/* Probe all active MC(s) */
+		reg = readl(mcba_csr + CSW_CSWCR);
+		xgene_pmu->mc_active_mask =
+			(reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0x3 : 0x1;
+	}
+
+	return 0;
+}
+
+static int acpi_pmu_v3_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
+						struct platform_device *pdev)
+{
+	void __iomem *csw_csr;
+	unsigned int reg;
+	u32 mcb0routing;
+	u32 mcb1routing;
+
+	csw_csr = devm_platform_ioremap_resource(pdev, 1);
+	if (IS_ERR(csw_csr)) {
+		dev_err(&pdev->dev, "ioremap failed for CSW CSR resource\n");
+		return PTR_ERR(csw_csr);
+	}
+
+	reg = readl(csw_csr + CSW_CSWCR);
+	mcb0routing = CSW_CSWCR_MCB0_ROUTING(reg);
+	mcb1routing = CSW_CSWCR_MCB1_ROUTING(reg);
+	if (reg & CSW_CSWCR_DUALMCB_MASK) {
+		/* Dual MCB active */
+		xgene_pmu->mcb_active_mask = 0x3;
+		/* Probe all active L3C(s), maximum is 8 */
+		xgene_pmu->l3c_active_mask = 0xFF;
+		/* Probe all active MC(s), maximum is 8 */
+		if ((mcb0routing == 0x2) && (mcb1routing == 0x2))
+			xgene_pmu->mc_active_mask = 0xFF;
+		else if ((mcb0routing == 0x1) && (mcb1routing == 0x1))
+			xgene_pmu->mc_active_mask =  0x33;
+		else
+			xgene_pmu->mc_active_mask =  0x11;
+	} else {
+		/* Single MCB active */
+		xgene_pmu->mcb_active_mask = 0x1;
+		/* Probe all active L3C(s), maximum is 4 */
+		xgene_pmu->l3c_active_mask = 0x0F;
+		/* Probe all active MC(s), maximum is 4 */
+		if (mcb0routing == 0x2)
+			xgene_pmu->mc_active_mask = 0x0F;
+		else if (mcb0routing == 0x1)
+			xgene_pmu->mc_active_mask =  0x03;
+		else
+			xgene_pmu->mc_active_mask =  0x01;
+	}
+
+	return 0;
+}
+
+static int fdt_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
+					    struct platform_device *pdev)
+{
+	struct regmap *csw_map, *mcba_map, *mcbb_map;
+	struct device_node *np = pdev->dev.of_node;
+	unsigned int reg;
+
+	csw_map = syscon_regmap_lookup_by_phandle(np, "regmap-csw");
+	if (IS_ERR(csw_map)) {
+		dev_err(&pdev->dev, "unable to get syscon regmap csw\n");
+		return PTR_ERR(csw_map);
+	}
+
+	mcba_map = syscon_regmap_lookup_by_phandle(np, "regmap-mcba");
+	if (IS_ERR(mcba_map)) {
+		dev_err(&pdev->dev, "unable to get syscon regmap mcba\n");
+		return PTR_ERR(mcba_map);
+	}
+
+	mcbb_map = syscon_regmap_lookup_by_phandle(np, "regmap-mcbb");
+	if (IS_ERR(mcbb_map)) {
+		dev_err(&pdev->dev, "unable to get syscon regmap mcbb\n");
+		return PTR_ERR(mcbb_map);
+	}
+
+	xgene_pmu->l3c_active_mask = 0x1;
+	if (regmap_read(csw_map, CSW_CSWCR, &reg))
+		return -EINVAL;
+
+	if (reg & CSW_CSWCR_DUALMCB_MASK) {
+		/* Dual MCB active */
+		xgene_pmu->mcb_active_mask = 0x3;
+		/* Probe all active MC(s) */
+		if (regmap_read(mcbb_map, MCBADDRMR, &reg))
+			return 0;
+		xgene_pmu->mc_active_mask =
+			(reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0xF : 0x5;
+	} else {
+		/* Single MCB active */
+		xgene_pmu->mcb_active_mask = 0x1;
+		/* Probe all active MC(s) */
+		if (regmap_read(mcba_map, MCBADDRMR, &reg))
+			return 0;
+		xgene_pmu->mc_active_mask =
+			(reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0x3 : 0x1;
+	}
+
+	return 0;
+}
+
+static int xgene_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
+					      struct platform_device *pdev)
+{
+	if (has_acpi_companion(&pdev->dev)) {
+		if (xgene_pmu->version == PCP_PMU_V3)
+			return acpi_pmu_v3_probe_active_mcb_mcu_l3c(xgene_pmu,
+								    pdev);
+		else
+			return acpi_pmu_probe_active_mcb_mcu_l3c(xgene_pmu,
+								 pdev);
+	}
+	return fdt_pmu_probe_active_mcb_mcu_l3c(xgene_pmu, pdev);
+}
+
+static char *xgene_pmu_dev_name(struct device *dev, u32 type, int id)
+{
+	switch (type) {
+	case PMU_TYPE_L3C:
+		return devm_kasprintf(dev, GFP_KERNEL, "l3c%d", id);
+	case PMU_TYPE_IOB:
+		return devm_kasprintf(dev, GFP_KERNEL, "iob%d", id);
+	case PMU_TYPE_IOB_SLOW:
+		return devm_kasprintf(dev, GFP_KERNEL, "iob_slow%d", id);
+	case PMU_TYPE_MCB:
+		return devm_kasprintf(dev, GFP_KERNEL, "mcb%d", id);
+	case PMU_TYPE_MC:
+		return devm_kasprintf(dev, GFP_KERNEL, "mc%d", id);
+	default:
+		return devm_kasprintf(dev, GFP_KERNEL, "unknown");
+	}
+}
+
+#if defined(CONFIG_ACPI)
+static struct
+xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu,
+				       struct acpi_device *adev, u32 type)
+{
+	struct device *dev = xgene_pmu->dev;
+	struct list_head resource_list;
+	struct xgene_pmu_dev_ctx *ctx;
+	const union acpi_object *obj;
+	struct hw_pmu_info *inf;
+	void __iomem *dev_csr;
+	struct resource res;
+	struct resource_entry *rentry;
+	int enable_bit;
+	int rc;
+
+	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return NULL;
+
+	INIT_LIST_HEAD(&resource_list);
+	rc = acpi_dev_get_resources(adev, &resource_list, NULL, NULL);
+	if (rc <= 0) {
+		dev_err(dev, "PMU type %d: No resources found\n", type);
+		return NULL;
+	}
+
+	list_for_each_entry(rentry, &resource_list, node) {
+		if (resource_type(rentry->res) == IORESOURCE_MEM) {
+			res = *rentry->res;
+			rentry = NULL;
+			break;
+		}
+	}
+	acpi_dev_free_resource_list(&resource_list);
+
+	if (rentry) {
+		dev_err(dev, "PMU type %d: No memory resource found\n", type);
+		return NULL;
+	}
+
+	dev_csr = devm_ioremap_resource(dev, &res);
+	if (IS_ERR(dev_csr)) {
+		dev_err(dev, "PMU type %d: Fail to map resource\n", type);
+		return NULL;
+	}
+
+	/* A PMU device node without enable-bit-index is always enabled */
+	rc = acpi_dev_get_property(adev, "enable-bit-index",
+				   ACPI_TYPE_INTEGER, &obj);
+	if (rc < 0)
+		enable_bit = 0;
+	else
+		enable_bit = (int) obj->integer.value;
+
+	ctx->name = xgene_pmu_dev_name(dev, type, enable_bit);
+	if (!ctx->name) {
+		dev_err(dev, "PMU type %d: Fail to get device name\n", type);
+		return NULL;
+	}
+	inf = &ctx->inf;
+	inf->type = type;
+	inf->csr = dev_csr;
+	inf->enable_mask = 1 << enable_bit;
+
+	return ctx;
+}
+
+static const struct acpi_device_id xgene_pmu_acpi_type_match[] = {
+	{"APMC0D5D", PMU_TYPE_L3C},
+	{"APMC0D5E", PMU_TYPE_IOB},
+	{"APMC0D5F", PMU_TYPE_MCB},
+	{"APMC0D60", PMU_TYPE_MC},
+	{"APMC0D84", PMU_TYPE_L3C},
+	{"APMC0D85", PMU_TYPE_IOB},
+	{"APMC0D86", PMU_TYPE_IOB_SLOW},
+	{"APMC0D87", PMU_TYPE_MCB},
+	{"APMC0D88", PMU_TYPE_MC},
+	{},
+};
+
+static const struct acpi_device_id *xgene_pmu_acpi_match_type(
+					const struct acpi_device_id *ids,
+					struct acpi_device *adev)
+{
+	const struct acpi_device_id *match_id = NULL;
+	const struct acpi_device_id *id;
+
+	for (id = ids; id->id[0] || id->cls; id++) {
+		if (!acpi_match_device_ids(adev, id))
+			match_id = id;
+		else if (match_id)
+			break;
+	}
+
+	return match_id;
+}
+
+static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level,
+				    void *data, void **return_value)
+{
+	struct acpi_device *adev = acpi_fetch_acpi_dev(handle);
+	const struct acpi_device_id *acpi_id;
+	struct xgene_pmu *xgene_pmu = data;
+	struct xgene_pmu_dev_ctx *ctx;
+
+	if (!adev || acpi_bus_get_status(adev) || !adev->status.present)
+		return AE_OK;
+
+	acpi_id = xgene_pmu_acpi_match_type(xgene_pmu_acpi_type_match, adev);
+	if (!acpi_id)
+		return AE_OK;
+
+	ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, (u32)acpi_id->driver_data);
+	if (!ctx)
+		return AE_OK;
+
+	if (xgene_pmu_dev_add(xgene_pmu, ctx)) {
+		/* Can't add the PMU device, skip it */
+		devm_kfree(xgene_pmu->dev, ctx);
+		return AE_OK;
+	}
+
+	switch (ctx->inf.type) {
+	case PMU_TYPE_L3C:
+		list_add(&ctx->next, &xgene_pmu->l3cpmus);
+		break;
+	case PMU_TYPE_IOB:
+		list_add(&ctx->next, &xgene_pmu->iobpmus);
+		break;
+	case PMU_TYPE_IOB_SLOW:
+		list_add(&ctx->next, &xgene_pmu->iobpmus);
+		break;
+	case PMU_TYPE_MCB:
+		list_add(&ctx->next, &xgene_pmu->mcbpmus);
+		break;
+	case PMU_TYPE_MC:
+		list_add(&ctx->next, &xgene_pmu->mcpmus);
+		break;
+	}
+	return AE_OK;
+}
+
+static int acpi_pmu_probe_pmu_dev(struct xgene_pmu *xgene_pmu,
+				  struct platform_device *pdev)
+{
+	struct device *dev = xgene_pmu->dev;
+	acpi_handle handle;
+	acpi_status status;
+
+	handle = ACPI_HANDLE(dev);
+	if (!handle)
+		return -EINVAL;
+
+	status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
+				     acpi_pmu_dev_add, NULL, xgene_pmu, NULL);
+	if (ACPI_FAILURE(status)) {
+		dev_err(dev, "failed to probe PMU devices\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+#else
+static int acpi_pmu_probe_pmu_dev(struct xgene_pmu *xgene_pmu,
+				  struct platform_device *pdev)
+{
+	return 0;
+}
+#endif
+
+static struct
+xgene_pmu_dev_ctx *fdt_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu,
+				      struct device_node *np, u32 type)
+{
+	struct device *dev = xgene_pmu->dev;
+	struct xgene_pmu_dev_ctx *ctx;
+	struct hw_pmu_info *inf;
+	void __iomem *dev_csr;
+	struct resource res;
+	int enable_bit;
+
+	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return NULL;
+
+	if (of_address_to_resource(np, 0, &res) < 0) {
+		dev_err(dev, "PMU type %d: No resource address found\n", type);
+		return NULL;
+	}
+
+	dev_csr = devm_ioremap_resource(dev, &res);
+	if (IS_ERR(dev_csr)) {
+		dev_err(dev, "PMU type %d: Fail to map resource\n", type);
+		return NULL;
+	}
+
+	/* A PMU device node without enable-bit-index is always enabled */
+	if (of_property_read_u32(np, "enable-bit-index", &enable_bit))
+		enable_bit = 0;
+
+	ctx->name = xgene_pmu_dev_name(dev, type, enable_bit);
+	if (!ctx->name) {
+		dev_err(dev, "PMU type %d: Fail to get device name\n", type);
+		return NULL;
+	}
+
+	inf = &ctx->inf;
+	inf->type = type;
+	inf->csr = dev_csr;
+	inf->enable_mask = 1 << enable_bit;
+
+	return ctx;
+}
+
+static int fdt_pmu_probe_pmu_dev(struct xgene_pmu *xgene_pmu,
+				 struct platform_device *pdev)
+{
+	struct xgene_pmu_dev_ctx *ctx;
+	struct device_node *np;
+
+	for_each_child_of_node(pdev->dev.of_node, np) {
+		if (!of_device_is_available(np))
+			continue;
+
+		if (of_device_is_compatible(np, "apm,xgene-pmu-l3c"))
+			ctx = fdt_get_pmu_hw_inf(xgene_pmu, np, PMU_TYPE_L3C);
+		else if (of_device_is_compatible(np, "apm,xgene-pmu-iob"))
+			ctx = fdt_get_pmu_hw_inf(xgene_pmu, np, PMU_TYPE_IOB);
+		else if (of_device_is_compatible(np, "apm,xgene-pmu-mcb"))
+			ctx = fdt_get_pmu_hw_inf(xgene_pmu, np, PMU_TYPE_MCB);
+		else if (of_device_is_compatible(np, "apm,xgene-pmu-mc"))
+			ctx = fdt_get_pmu_hw_inf(xgene_pmu, np, PMU_TYPE_MC);
+		else
+			ctx = NULL;
+
+		if (!ctx)
+			continue;
+
+		if (xgene_pmu_dev_add(xgene_pmu, ctx)) {
+			/* Can't add the PMU device, skip it */
+			devm_kfree(xgene_pmu->dev, ctx);
+			continue;
+		}
+
+		switch (ctx->inf.type) {
+		case PMU_TYPE_L3C:
+			list_add(&ctx->next, &xgene_pmu->l3cpmus);
+			break;
+		case PMU_TYPE_IOB:
+			list_add(&ctx->next, &xgene_pmu->iobpmus);
+			break;
+		case PMU_TYPE_IOB_SLOW:
+			list_add(&ctx->next, &xgene_pmu->iobpmus);
+			break;
+		case PMU_TYPE_MCB:
+			list_add(&ctx->next, &xgene_pmu->mcbpmus);
+			break;
+		case PMU_TYPE_MC:
+			list_add(&ctx->next, &xgene_pmu->mcpmus);
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int xgene_pmu_probe_pmu_dev(struct xgene_pmu *xgene_pmu,
+				   struct platform_device *pdev)
+{
+	if (has_acpi_companion(&pdev->dev))
+		return acpi_pmu_probe_pmu_dev(xgene_pmu, pdev);
+	return fdt_pmu_probe_pmu_dev(xgene_pmu, pdev);
+}
+
+static const struct xgene_pmu_data xgene_pmu_data = {
+	.id   = PCP_PMU_V1,
+};
+
+static const struct xgene_pmu_data xgene_pmu_v2_data = {
+	.id   = PCP_PMU_V2,
+};
+
+static const struct xgene_pmu_ops xgene_pmu_ops = {
+	.mask_int = xgene_pmu_mask_int,
+	.unmask_int = xgene_pmu_unmask_int,
+	.read_counter = xgene_pmu_read_counter32,
+	.write_counter = xgene_pmu_write_counter32,
+	.write_evttype = xgene_pmu_write_evttype,
+	.write_agentmsk = xgene_pmu_write_agentmsk,
+	.write_agent1msk = xgene_pmu_write_agent1msk,
+	.enable_counter = xgene_pmu_enable_counter,
+	.disable_counter = xgene_pmu_disable_counter,
+	.enable_counter_int = xgene_pmu_enable_counter_int,
+	.disable_counter_int = xgene_pmu_disable_counter_int,
+	.reset_counters = xgene_pmu_reset_counters,
+	.start_counters = xgene_pmu_start_counters,
+	.stop_counters = xgene_pmu_stop_counters,
+};
+
+static const struct xgene_pmu_ops xgene_pmu_v3_ops = {
+	.mask_int = xgene_pmu_v3_mask_int,
+	.unmask_int = xgene_pmu_v3_unmask_int,
+	.read_counter = xgene_pmu_read_counter64,
+	.write_counter = xgene_pmu_write_counter64,
+	.write_evttype = xgene_pmu_write_evttype,
+	.write_agentmsk = xgene_pmu_v3_write_agentmsk,
+	.write_agent1msk = xgene_pmu_v3_write_agent1msk,
+	.enable_counter = xgene_pmu_enable_counter,
+	.disable_counter = xgene_pmu_disable_counter,
+	.enable_counter_int = xgene_pmu_enable_counter_int,
+	.disable_counter_int = xgene_pmu_disable_counter_int,
+	.reset_counters = xgene_pmu_reset_counters,
+	.start_counters = xgene_pmu_start_counters,
+	.stop_counters = xgene_pmu_stop_counters,
+};
+
+static const struct of_device_id xgene_pmu_of_match[] = {
+	{ .compatible	= "apm,xgene-pmu",	.data = &xgene_pmu_data },
+	{ .compatible	= "apm,xgene-pmu-v2",	.data = &xgene_pmu_v2_data },
+	{},
+};
+MODULE_DEVICE_TABLE(of, xgene_pmu_of_match);
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id xgene_pmu_acpi_match[] = {
+	{"APMC0D5B", PCP_PMU_V1},
+	{"APMC0D5C", PCP_PMU_V2},
+	{"APMC0D83", PCP_PMU_V3},
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, xgene_pmu_acpi_match);
+#endif
+
+static int xgene_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct xgene_pmu *xgene_pmu = hlist_entry_safe(node, struct xgene_pmu,
+						       node);
+
+	if (cpumask_empty(&xgene_pmu->cpu))
+		cpumask_set_cpu(cpu, &xgene_pmu->cpu);
+
+	/* Overflow interrupt also should use the same CPU */
+	WARN_ON(irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu));
+
+	return 0;
+}
+
+static int xgene_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct xgene_pmu *xgene_pmu = hlist_entry_safe(node, struct xgene_pmu,
+						       node);
+	struct xgene_pmu_dev_ctx *ctx;
+	unsigned int target;
+
+	if (!cpumask_test_and_clear_cpu(cpu, &xgene_pmu->cpu))
+		return 0;
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	list_for_each_entry(ctx, &xgene_pmu->mcpmus, next) {
+		perf_pmu_migrate_context(&ctx->pmu_dev->pmu, cpu, target);
+	}
+	list_for_each_entry(ctx, &xgene_pmu->mcbpmus, next) {
+		perf_pmu_migrate_context(&ctx->pmu_dev->pmu, cpu, target);
+	}
+	list_for_each_entry(ctx, &xgene_pmu->l3cpmus, next) {
+		perf_pmu_migrate_context(&ctx->pmu_dev->pmu, cpu, target);
+	}
+	list_for_each_entry(ctx, &xgene_pmu->iobpmus, next) {
+		perf_pmu_migrate_context(&ctx->pmu_dev->pmu, cpu, target);
+	}
+
+	cpumask_set_cpu(target, &xgene_pmu->cpu);
+	/* Overflow interrupt also should use the same CPU */
+	WARN_ON(irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu));
+
+	return 0;
+}
+
+static int xgene_pmu_probe(struct platform_device *pdev)
+{
+	const struct xgene_pmu_data *dev_data;
+	const struct of_device_id *of_id;
+	struct xgene_pmu *xgene_pmu;
+	int irq, rc;
+	int version;
+
+	/* Install a hook to update the reader CPU in case it goes offline */
+	rc = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE,
+				      "CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE",
+				      xgene_pmu_online_cpu,
+				      xgene_pmu_offline_cpu);
+	if (rc)
+		return rc;
+
+	xgene_pmu = devm_kzalloc(&pdev->dev, sizeof(*xgene_pmu), GFP_KERNEL);
+	if (!xgene_pmu)
+		return -ENOMEM;
+	xgene_pmu->dev = &pdev->dev;
+	platform_set_drvdata(pdev, xgene_pmu);
+
+	version = -EINVAL;
+	of_id = of_match_device(xgene_pmu_of_match, &pdev->dev);
+	if (of_id) {
+		dev_data = (const struct xgene_pmu_data *) of_id->data;
+		version = dev_data->id;
+	}
+
+#ifdef CONFIG_ACPI
+	if (ACPI_COMPANION(&pdev->dev)) {
+		const struct acpi_device_id *acpi_id;
+
+		acpi_id = acpi_match_device(xgene_pmu_acpi_match, &pdev->dev);
+		if (acpi_id)
+			version = (int) acpi_id->driver_data;
+	}
+#endif
+	if (version < 0)
+		return -ENODEV;
+
+	if (version == PCP_PMU_V3)
+		xgene_pmu->ops = &xgene_pmu_v3_ops;
+	else
+		xgene_pmu->ops = &xgene_pmu_ops;
+
+	INIT_LIST_HEAD(&xgene_pmu->l3cpmus);
+	INIT_LIST_HEAD(&xgene_pmu->iobpmus);
+	INIT_LIST_HEAD(&xgene_pmu->mcbpmus);
+	INIT_LIST_HEAD(&xgene_pmu->mcpmus);
+
+	xgene_pmu->version = version;
+	dev_info(&pdev->dev, "X-Gene PMU version %d\n", xgene_pmu->version);
+
+	xgene_pmu->pcppmu_csr = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(xgene_pmu->pcppmu_csr)) {
+		dev_err(&pdev->dev, "ioremap failed for PCP PMU resource\n");
+		return PTR_ERR(xgene_pmu->pcppmu_csr);
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return -EINVAL;
+
+	rc = devm_request_irq(&pdev->dev, irq, xgene_pmu_isr,
+				IRQF_NOBALANCING | IRQF_NO_THREAD,
+				dev_name(&pdev->dev), xgene_pmu);
+	if (rc) {
+		dev_err(&pdev->dev, "Could not request IRQ %d\n", irq);
+		return rc;
+	}
+
+	xgene_pmu->irq = irq;
+
+	raw_spin_lock_init(&xgene_pmu->lock);
+
+	/* Check for active MCBs and MCUs */
+	rc = xgene_pmu_probe_active_mcb_mcu_l3c(xgene_pmu, pdev);
+	if (rc) {
+		dev_warn(&pdev->dev, "Unknown MCB/MCU active status\n");
+		xgene_pmu->mcb_active_mask = 0x1;
+		xgene_pmu->mc_active_mask = 0x1;
+	}
+
+	/* Add this instance to the list used by the hotplug callback */
+	rc = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE,
+				      &xgene_pmu->node);
+	if (rc) {
+		dev_err(&pdev->dev, "Error %d registering hotplug", rc);
+		return rc;
+	}
+
+	/* Walk through the tree for all PMU perf devices */
+	rc = xgene_pmu_probe_pmu_dev(xgene_pmu, pdev);
+	if (rc) {
+		dev_err(&pdev->dev, "No PMU perf devices found!\n");
+		goto out_unregister;
+	}
+
+	/* Enable interrupt */
+	xgene_pmu->ops->unmask_int(xgene_pmu);
+
+	return 0;
+
+out_unregister:
+	cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE,
+				    &xgene_pmu->node);
+	return rc;
+}
+
+static void
+xgene_pmu_dev_cleanup(struct xgene_pmu *xgene_pmu, struct list_head *pmus)
+{
+	struct xgene_pmu_dev_ctx *ctx;
+
+	list_for_each_entry(ctx, pmus, next) {
+		perf_pmu_unregister(&ctx->pmu_dev->pmu);
+	}
+}
+
+static int xgene_pmu_remove(struct platform_device *pdev)
+{
+	struct xgene_pmu *xgene_pmu = dev_get_drvdata(&pdev->dev);
+
+	xgene_pmu_dev_cleanup(xgene_pmu, &xgene_pmu->l3cpmus);
+	xgene_pmu_dev_cleanup(xgene_pmu, &xgene_pmu->iobpmus);
+	xgene_pmu_dev_cleanup(xgene_pmu, &xgene_pmu->mcbpmus);
+	xgene_pmu_dev_cleanup(xgene_pmu, &xgene_pmu->mcpmus);
+	cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE,
+				    &xgene_pmu->node);
+
+	return 0;
+}
+
+static struct platform_driver xgene_pmu_driver = {
+	.probe = xgene_pmu_probe,
+	.remove = xgene_pmu_remove,
+	.driver = {
+		.name		= "xgene-pmu",
+		.of_match_table = xgene_pmu_of_match,
+		.acpi_match_table = ACPI_PTR(xgene_pmu_acpi_match),
+		.suppress_bind_attrs = true,
+	},
+};
+
+builtin_platform_driver(xgene_pmu_driver);
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-11 08:27:49 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-11 08:27:49 +0000
commit	ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
tree	b2d64bc10158fdd5497876388cd68142ca374ed3 /drivers/perf
parent	Initial commit. (diff)
download	linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip