summaryrefslogtreecommitdiffstats
path: root/tools/perf/pmu-events/arch/x86
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 18:50:36 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 18:50:36 +0000
commit50ba0232fd5312410f1b65247e774244f89a628e (patch)
treefd8f2fc78e9e548af0ff9590276602ee6125be00 /tools/perf/pmu-events/arch/x86
parentReleasing progress-linux version 6.7.12-1~progress7.99u1. (diff)
downloadlinux-50ba0232fd5312410f1b65247e774244f89a628e.tar.xz
linux-50ba0232fd5312410f1b65247e774244f89a628e.zip
Merging upstream version 6.8.9.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tools/perf/pmu-events/arch/x86')
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json269
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json4
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen4/cache.json56
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen4/memory-controller.json101
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen4/recommended.json84
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json6
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json27
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json18
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-interconnect.json8
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-io.json30
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json6
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/other.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/pipeline.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json6
-rw-r--r--tools/perf/pmu-events/arch/x86/mapfile.csv6
-rw-r--r--tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/floating-point.json27
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json31
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json8
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-io.json30
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json6
22 files changed, 523 insertions, 208 deletions
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
index 3388b58b8f..bbfa3883e5 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
@@ -70,12 +70,6 @@
"ScaleUnit": "100%"
},
{
- "BriefDescription": "Uncore frequency per die [GHZ]",
- "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9",
- "MetricGroup": "SoC",
- "MetricName": "UNCORE_FREQ"
- },
- {
"BriefDescription": "Percentage of cycles spent in System Management Interrupts.",
"MetricExpr": "((msr@aperf@ - cycles) / msr@aperf@ if msr@smi@ > 0 else 0)",
"MetricGroup": "smi",
@@ -120,7 +114,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to certain allocation restrictions.",
- "MetricExpr": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_alloc_restriction",
"MetricThreshold": "tma_alloc_restriction > 0.1",
@@ -130,7 +124,7 @@
{
"BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "TOPDOWN_BE_BOUND.ALL / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.ALL@ / tma_info_core_slots",
"MetricGroup": "Default;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.1",
@@ -175,7 +169,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend",
- "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_DETECT / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.BRANCH_DETECT@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_detect",
"MetricThreshold": "tma_branch_detect > 0.05",
@@ -185,7 +179,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to branch mispredicts.",
- "MetricExpr": "TOPDOWN_BAD_SPECULATION.MISPREDICT / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.MISPREDICT@ / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.05",
@@ -195,7 +189,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.",
- "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_RESTEER / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.BRANCH_RESTEER@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_resteer",
"MetricThreshold": "tma_branch_resteer > 0.05",
@@ -204,7 +198,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to the microcode sequencer (MS).",
- "MetricExpr": "TOPDOWN_FE_BOUND.CISC / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.CISC@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_cisc",
"MetricThreshold": "tma_cisc > 0.05",
@@ -223,7 +217,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to decode stalls.",
- "MetricExpr": "TOPDOWN_FE_BOUND.DECODE / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.DECODE@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_decode",
"MetricThreshold": "tma_decode > 0.05",
@@ -241,7 +235,6 @@
},
{
"BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
@@ -251,7 +244,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear classified as a fast nuke due to memory ordering, memory disambiguation and memory renaming.",
- "MetricExpr": "TOPDOWN_BAD_SPECULATION.FASTNUKE / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.FASTNUKE@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group",
"MetricName": "tma_fast_nuke",
"MetricThreshold": "tma_fast_nuke > 0.05",
@@ -260,7 +253,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.",
- "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH@ / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_bandwidth",
"MetricThreshold": "tma_fetch_bandwidth > 0.1",
@@ -270,7 +263,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.",
- "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.FRONTEND_LATENCY@ / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.15",
@@ -289,7 +282,7 @@
},
{
"BriefDescription": "Counts the number of floating point divide operations per uop.",
- "MetricExpr": "UOPS_RETIRED.FPDIV / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@UOPS_RETIRED.FPDIV@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_base_group",
"MetricName": "tma_fpdiv_uops",
"MetricThreshold": "tma_fpdiv_uops > 0.2",
@@ -299,7 +292,7 @@
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to frontend stalls.",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "TOPDOWN_FE_BOUND.ALL / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ALL@ / tma_info_core_slots",
"MetricGroup": "Default;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.2",
@@ -309,7 +302,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to instruction cache misses.",
- "MetricExpr": "TOPDOWN_FE_BOUND.ICACHE / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ICACHE@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_icache_misses",
"MetricThreshold": "tma_icache_misses > 0.05",
@@ -336,7 +329,7 @@
},
{
"BriefDescription": "Instructions Per Cycle",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / tma_info_core_clks",
"MetricName": "tma_info_core_ipc",
"Unit": "cpu_atom"
},
@@ -348,7 +341,7 @@
},
{
"BriefDescription": "Uops Per Instruction",
- "MetricExpr": "UOPS_RETIRED.ALL / INST_RETIRED.ANY",
+ "MetricExpr": "cpu_atom@UOPS_RETIRED.ALL@ / INST_RETIRED.ANY",
"MetricName": "tma_info_core_upi",
"Unit": "cpu_atom"
},
@@ -372,13 +365,13 @@
},
{
"BriefDescription": "Ratio of all branches which mispredict",
- "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricExpr": "cpu_atom@BR_MISP_RETIRED.ALL_BRANCHES@ / BR_INST_RETIRED.ALL_BRANCHES",
"MetricName": "tma_info_inst_mix_branch_mispredict_ratio",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Ratio between Mispredicted branches and unknown branches",
- "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BACLEARS.ANY",
+ "MetricExpr": "cpu_atom@BR_MISP_RETIRED.ALL_BRANCHES@ / BACLEARS.ANY",
"MetricName": "tma_info_inst_mix_branch_mispredict_to_unknown_branch_ratio",
"Unit": "cpu_atom"
},
@@ -396,61 +389,61 @@
},
{
"BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_INST_RETIRED.ALL_BRANCHES",
"MetricName": "tma_info_inst_mix_ipbranch",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Instruction per (near) call (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.CALL",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_INST_RETIRED.CALL",
"MetricName": "tma_info_inst_mix_ipcall",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Instructions per Far Branch",
- "MetricExpr": "INST_RETIRED.ANY / (cpu_atom@BR_INST_RETIRED.FAR_BRANCH@ / 2)",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / (cpu_atom@BR_INST_RETIRED.FAR_BRANCH@ / 2)",
"MetricName": "tma_info_inst_mix_ipfarbranch",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Instructions per Load",
- "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / MEM_UOPS_RETIRED.ALL_LOADS",
"MetricName": "tma_info_inst_mix_ipload",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was not taken",
- "MetricExpr": "INST_RETIRED.ANY / (cpu_atom@BR_MISP_RETIRED.COND@ - cpu_atom@BR_MISP_RETIRED.COND_TAKEN@)",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / (cpu_atom@BR_MISP_RETIRED.COND@ - cpu_atom@BR_MISP_RETIRED.COND_TAKEN@)",
"MetricName": "tma_info_inst_mix_ipmisp_cond_ntaken",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was taken",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.COND_TAKEN",
"MetricName": "tma_info_inst_mix_ipmisp_cond_taken",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Instructions per retired indirect call or jump Branch Misprediction",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.INDIRECT",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.INDIRECT",
"MetricName": "tma_info_inst_mix_ipmisp_indirect",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Instructions per retired return Branch Misprediction",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RETURN",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.RETURN",
"MetricName": "tma_info_inst_mix_ipmisp_ret",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Instructions per retired Branch Misprediction",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.ALL_BRANCHES",
"MetricName": "tma_info_inst_mix_ipmispredict",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Instructions per Store",
- "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES",
+ "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / MEM_UOPS_RETIRED.ALL_STORES",
"MetricName": "tma_info_inst_mix_ipstore",
"Unit": "cpu_atom"
},
@@ -486,19 +479,19 @@
},
{
"BriefDescription": "Cycle cost per DRAM hit",
- "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
+ "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
"MetricName": "tma_info_memory_cycles_per_demand_load_dram_hit",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Cycle cost per L2 hit",
- "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_LOAD_UOPS_RETIRED.L2_HIT",
+ "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / MEM_LOAD_UOPS_RETIRED.L2_HIT",
"MetricName": "tma_info_memory_cycles_per_demand_load_l2_hit",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Cycle cost per LLC hit",
- "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_LOAD_UOPS_RETIRED.L3_HIT",
+ "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / MEM_LOAD_UOPS_RETIRED.L3_HIT",
"MetricName": "tma_info_memory_cycles_per_demand_load_l3_hit",
"Unit": "cpu_atom"
},
@@ -510,7 +503,7 @@
},
{
"BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.REF_TSC@ / TSC",
"MetricName": "tma_info_system_cpu_utilization",
"Unit": "cpu_atom"
},
@@ -530,7 +523,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.",
- "MetricExpr": "TOPDOWN_FE_BOUND.ITLB / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ITLB@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_itlb_misses",
"MetricThreshold": "tma_itlb_misses > 0.05",
@@ -539,7 +532,7 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a load block.",
- "MetricExpr": "LD_HEAD.L1_BOUND_AT_RET / tma_info_core_clks",
+ "MetricExpr": "cpu_atom@LD_HEAD.L1_BOUND_AT_RET@ / tma_info_core_clks",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l1_bound",
"MetricThreshold": "tma_l1_bound > 0.1",
@@ -548,7 +541,6 @@
},
{
"BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 Cache.",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l2_bound",
@@ -558,7 +550,6 @@
},
{
"BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
@@ -577,7 +568,7 @@
},
{
"BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.",
- "MetricExpr": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS@ / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group",
"MetricName": "tma_machine_clears",
"MetricThreshold": "tma_machine_clears > 0.05",
@@ -587,7 +578,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.",
- "MetricExpr": "TOPDOWN_BE_BOUND.MEM_SCHEDULER / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.MEM_SCHEDULER@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_mem_scheduler",
"MetricThreshold": "tma_mem_scheduler > 0.1",
@@ -596,7 +587,7 @@
},
{
"BriefDescription": "Counts the number of cycles the core is stalled due to stores or loads.",
- "MetricExpr": "min(cpu_atom@TOPDOWN_BE_BOUND.ALL@ / tma_info_core_slots, cpu_atom@LD_HEAD.ANY_AT_RET@ / tma_info_core_clks + tma_store_bound)",
+ "MetricExpr": "min(tma_backend_bound, cpu_atom@LD_HEAD.ANY_AT_RET@ / tma_info_core_clks + tma_store_bound)",
"MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2",
@@ -615,7 +606,7 @@
},
{
"BriefDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS)",
- "MetricExpr": "UOPS_RETIRED.MS / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@UOPS_RETIRED.MS@ / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_ms_uops",
"MetricThreshold": "tma_ms_uops > 0.05",
@@ -626,7 +617,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.",
- "MetricExpr": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_non_mem_scheduler",
"MetricThreshold": "tma_non_mem_scheduler > 0.1",
@@ -635,7 +626,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear (slow nuke).",
- "MetricExpr": "TOPDOWN_BAD_SPECULATION.NUKE / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.NUKE@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group",
"MetricName": "tma_nuke",
"MetricThreshold": "tma_nuke > 0.05",
@@ -644,7 +635,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to other common frontend stalls not categorized.",
- "MetricExpr": "TOPDOWN_FE_BOUND.OTHER / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.OTHER@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_other_fb",
"MetricThreshold": "tma_other_fb > 0.05",
@@ -653,7 +644,7 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a number of other load blocks.",
- "MetricExpr": "LD_HEAD.OTHER_AT_RET / tma_info_core_clks",
+ "MetricExpr": "cpu_atom@LD_HEAD.OTHER_AT_RET@ / tma_info_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_other_l1",
"MetricThreshold": "tma_other_l1 > 0.05",
@@ -689,7 +680,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to wrong predecodes.",
- "MetricExpr": "TOPDOWN_FE_BOUND.PREDECODE / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.PREDECODE@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_predecode",
"MetricThreshold": "tma_predecode > 0.05",
@@ -698,7 +689,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).",
- "MetricExpr": "TOPDOWN_BE_BOUND.REGISTER / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.REGISTER@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_register",
"MetricThreshold": "tma_register > 0.1",
@@ -707,7 +698,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the reorder buffer being full (ROB stalls).",
- "MetricExpr": "TOPDOWN_BE_BOUND.REORDER_BUFFER / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.REORDER_BUFFER@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_reorder_buffer",
"MetricThreshold": "tma_reorder_buffer > 0.1",
@@ -728,7 +719,7 @@
{
"BriefDescription": "Counts the number of issue slots that result in retirement slots.",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "TOPDOWN_RETIRING.ALL / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_RETIRING.ALL@ / tma_info_core_slots",
"MetricGroup": "Default;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.75",
@@ -747,7 +738,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).",
- "MetricExpr": "TOPDOWN_BE_BOUND.SERIALIZATION / tma_info_core_slots",
+ "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.SERIALIZATION@ / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_serialization",
"MetricThreshold": "tma_serialization > 0.1",
@@ -774,7 +765,7 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a first level TLB miss.",
- "MetricExpr": "LD_HEAD.DTLB_MISS_AT_RET / tma_info_core_clks",
+ "MetricExpr": "cpu_atom@LD_HEAD.DTLB_MISS_AT_RET@ / tma_info_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_stlb_hit",
"MetricThreshold": "tma_stlb_hit > 0.05",
@@ -783,7 +774,7 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a second level TLB miss requiring a page walk.",
- "MetricExpr": "LD_HEAD.PGWALK_AT_RET / tma_info_core_clks",
+ "MetricExpr": "cpu_atom@LD_HEAD.PGWALK_AT_RET@ / tma_info_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_stlb_miss",
"MetricThreshold": "tma_stlb_miss > 0.05",
@@ -801,8 +792,7 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_core_clks",
+ "MetricExpr": "cpu_atom@LD_HEAD.ST_ADDR_AT_RET@ / tma_info_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
"MetricThreshold": "tma_store_fwd_blk > 0.05",
@@ -810,6 +800,13 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Uncore frequency per die [GHZ]",
+ "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9",
+ "MetricGroup": "SoC",
+ "MetricName": "UNCORE_FREQ",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
"MetricExpr": "(cpu_core@UOPS_DISPATCHED.PORT_0@ + cpu_core@UOPS_DISPATCHED.PORT_1@ + cpu_core@UOPS_DISPATCHED.PORT_5_11@ + cpu_core@UOPS_DISPATCHED.PORT_6@) / (5 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
@@ -874,7 +871,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers",
- "MetricExpr": "INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks + tma_unknown_branches",
+ "MetricExpr": "cpu_core@INT_MISC.CLEAR_RESTEER_CYCLES@ / tma_info_thread_clks + tma_unknown_branches",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_resteers",
"MetricThreshold": "tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -904,7 +901,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(25 * tma_info_system_average_frequency * (cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@ * (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ / (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ + cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD@))) + 24 * tma_info_system_average_frequency * cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS@) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
@@ -926,7 +922,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "24 * tma_info_system_average_frequency * (cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD@ + cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@ * (1 - cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ / (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ + cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD@))) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
"MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_data_sharing",
@@ -947,7 +942,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the Divider unit was active",
- "MetricExpr": "ARITH.DIV_ACTIVE / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@ARITH.DIV_ACTIVE@ / tma_info_thread_clks",
"MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_divider",
"MetricThreshold": "tma_divider > 0.2 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -957,7 +952,6 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "cpu_core@MEMORY_ACTIVITY.STALLS_L3_MISS@ / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
@@ -978,7 +972,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines",
- "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES@ / tma_info_thread_clks",
"MetricGroup": "DSBmiss;FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_dsb_switches",
"MetricThreshold": "tma_dsb_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -1018,7 +1012,7 @@
},
{
"BriefDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed",
- "MetricExpr": "L1D_PEND_MISS.FB_FULL / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@L1D_PEND_MISS.FB_FULL@ / tma_info_thread_clks",
"MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
@@ -1153,7 +1147,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses",
- "MetricExpr": "ICACHE_DATA.STALLS / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@ICACHE_DATA.STALLS@ / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_icache_misses",
"MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -1163,7 +1157,6 @@
},
{
"BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;BrMispredicts;tma_issueBM",
"MetricName": "tma_info_bad_spec_branch_misprediction_cost",
@@ -1172,7 +1165,7 @@
},
{
"BriefDescription": "Instructions per retired mispredicts for conditional non-taken branches (lower number means higher occurrence rate).",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_NTAKEN",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.COND_NTAKEN",
"MetricGroup": "Bad;BrMispredicts",
"MetricName": "tma_info_bad_spec_ipmisp_cond_ntaken",
"MetricThreshold": "tma_info_bad_spec_ipmisp_cond_ntaken < 200",
@@ -1180,7 +1173,7 @@
},
{
"BriefDescription": "Instructions per retired mispredicts for conditional taken branches (lower number means higher occurrence rate).",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.COND_TAKEN",
"MetricGroup": "Bad;BrMispredicts",
"MetricName": "tma_info_bad_spec_ipmisp_cond_taken",
"MetricThreshold": "tma_info_bad_spec_ipmisp_cond_taken < 200",
@@ -1196,7 +1189,7 @@
},
{
"BriefDescription": "Instructions per retired mispredicts for return branches (lower number means higher occurrence rate).",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RET",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.RET",
"MetricGroup": "Bad;BrMispredicts",
"MetricName": "tma_info_bad_spec_ipmisp_ret",
"MetricThreshold": "tma_info_bad_spec_ipmisp_ret < 500",
@@ -1204,7 +1197,7 @@
},
{
"BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;BadSpec;BrMispredicts",
"MetricName": "tma_info_bad_spec_ipmispredict",
"MetricThreshold": "tma_info_bad_spec_ipmispredict < 200",
@@ -1212,7 +1205,6 @@
},
{
"BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)",
"MetricGroup": "Cor;SMT",
"MetricName": "tma_info_botlnk_l0_core_bound_likely",
@@ -1221,7 +1213,6 @@
},
{
"BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_lsd + tma_mite))",
"MetricGroup": "DSBmiss;Fed;tma_issueFB",
"MetricName": "tma_info_botlnk_l2_dsb_misses",
@@ -1231,7 +1222,6 @@
},
{
"BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
"MetricName": "tma_info_botlnk_l2_ic_misses",
@@ -1241,7 +1231,6 @@
},
{
"BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
"MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC",
"MetricName": "tma_info_bottleneck_big_code",
@@ -1260,7 +1249,6 @@
},
{
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
"MetricGroup": "Fed;FetchBW;Frontend",
"MetricName": "tma_info_bottleneck_instruction_fetch_bw",
@@ -1269,7 +1257,6 @@
},
{
"BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
"MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
"MetricName": "tma_info_bottleneck_memory_bandwidth",
@@ -1279,7 +1266,6 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
"MetricName": "tma_info_bottleneck_memory_data_tlbs",
@@ -1289,7 +1275,6 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))",
"MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
"MetricName": "tma_info_bottleneck_memory_latency",
@@ -1299,7 +1284,6 @@
},
{
"BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
"MetricName": "tma_info_bottleneck_mispredictions",
@@ -1316,14 +1300,14 @@
},
{
"BriefDescription": "Fraction of branches that are non-taken conditionals",
- "MetricExpr": "BR_INST_RETIRED.COND_NTAKEN / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricExpr": "cpu_core@BR_INST_RETIRED.COND_NTAKEN@ / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;Branches;CodeGen;PGO",
"MetricName": "tma_info_branches_cond_nt",
"Unit": "cpu_core"
},
{
"BriefDescription": "Fraction of branches that are taken conditionals",
- "MetricExpr": "BR_INST_RETIRED.COND_TAKEN / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricExpr": "cpu_core@BR_INST_RETIRED.COND_TAKEN@ / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;Branches;CodeGen;PGO",
"MetricName": "tma_info_branches_cond_tk",
"Unit": "cpu_core"
@@ -1351,7 +1335,7 @@
},
{
"BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_core_core_clks",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / tma_info_core_core_clks",
"MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
"MetricName": "tma_info_core_coreipc",
"Unit": "cpu_core"
@@ -1373,14 +1357,14 @@
},
{
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
- "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@ / 2 if #SMT_on else cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@)",
+ "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / (cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@ / 2 if #SMT_on else cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@)",
"MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
"MetricName": "tma_info_core_ilp",
"Unit": "cpu_core"
},
{
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
- "MetricExpr": "IDQ.DSB_UOPS / cpu_core@UOPS_ISSUED.ANY@",
+ "MetricExpr": "cpu_core@IDQ.DSB_UOPS@ / cpu_core@UOPS_ISSUED.ANY@",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
"MetricName": "tma_info_frontend_dsb_coverage",
"MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 6 > 0.35",
@@ -1389,28 +1373,28 @@
},
{
"BriefDescription": "Average number of cycles of a switch from the DSB fetch-unit to MITE fetch unit - see DSB_Switches tree node for details.",
- "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=1\\,edge@",
+ "MetricExpr": "cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES@ / cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=1\\,edge@",
"MetricGroup": "DSBmiss",
"MetricName": "tma_info_frontend_dsb_switch_cost",
"Unit": "cpu_core"
},
{
"BriefDescription": "Average number of Uops issued by front-end when it issued something",
- "MetricExpr": "UOPS_ISSUED.ANY / cpu_core@UOPS_ISSUED.ANY\\,cmask\\=1@",
+ "MetricExpr": "cpu_core@UOPS_ISSUED.ANY@ / cpu_core@UOPS_ISSUED.ANY\\,cmask\\=1@",
"MetricGroup": "Fed;FetchBW",
"MetricName": "tma_info_frontend_fetch_upc",
"Unit": "cpu_core"
},
{
"BriefDescription": "Average Latency for L1 instruction cache misses",
- "MetricExpr": "ICACHE_DATA.STALLS / cpu_core@ICACHE_DATA.STALLS\\,cmask\\=1\\,edge@",
+ "MetricExpr": "cpu_core@ICACHE_DATA.STALLS@ / cpu_core@ICACHE_DATA.STALLS\\,cmask\\=1\\,edge@",
"MetricGroup": "Fed;FetchLat;IcMiss",
"MetricName": "tma_info_frontend_icache_miss_latency",
"Unit": "cpu_core"
},
{
"BriefDescription": "Instructions per non-speculative DSB miss (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / FRONTEND_RETIRED.ANY_DSB_MISS",
"MetricGroup": "DSBmiss;Fed",
"MetricName": "tma_info_frontend_ipdsb_miss_ret",
"MetricThreshold": "tma_info_frontend_ipdsb_miss_ret < 50",
@@ -1439,14 +1423,14 @@
},
{
"BriefDescription": "Fraction of Uops delivered by the LSD (Loop Stream Detector; aka Loop Cache)",
- "MetricExpr": "LSD.UOPS / cpu_core@UOPS_ISSUED.ANY@",
+ "MetricExpr": "cpu_core@LSD.UOPS@ / cpu_core@UOPS_ISSUED.ANY@",
"MetricGroup": "Fed;LSD",
"MetricName": "tma_info_frontend_lsd_coverage",
"Unit": "cpu_core"
},
{
"BriefDescription": "Branch instructions per taken branch.",
- "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricExpr": "cpu_core@BR_INST_RETIRED.ALL_BRANCHES@ / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;PGO",
"MetricName": "tma_info_inst_mix_bptkbranch",
"Unit": "cpu_core"
@@ -1461,7 +1445,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@)",
"MetricGroup": "Flops;InsType",
"MetricName": "tma_info_inst_mix_iparith",
"MetricThreshold": "tma_info_inst_mix_iparith < 10",
@@ -1470,7 +1454,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@)",
"MetricGroup": "Flops;FpVector;InsType",
"MetricName": "tma_info_inst_mix_iparith_avx128",
"MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
@@ -1479,7 +1463,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)",
"MetricGroup": "Flops;FpVector;InsType",
"MetricName": "tma_info_inst_mix_iparith_avx256",
"MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
@@ -1488,7 +1472,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
"MetricGroup": "Flops;FpScalar;InsType",
"MetricName": "tma_info_inst_mix_iparith_scalar_dp",
"MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
@@ -1497,7 +1481,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
"MetricGroup": "Flops;FpScalar;InsType",
"MetricName": "tma_info_inst_mix_iparith_scalar_sp",
"MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
@@ -1506,7 +1490,7 @@
},
{
"BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": "Branches;Fed;InsType",
"MetricName": "tma_info_inst_mix_ipbranch",
"MetricThreshold": "tma_info_inst_mix_ipbranch < 8",
@@ -1514,7 +1498,7 @@
},
{
"BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_INST_RETIRED.NEAR_CALL",
"MetricGroup": "Branches;Fed;PGO",
"MetricName": "tma_info_inst_mix_ipcall",
"MetricThreshold": "tma_info_inst_mix_ipcall < 200",
@@ -1522,7 +1506,7 @@
},
{
"BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@) + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@) + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)",
"MetricGroup": "Flops;InsType",
"MetricName": "tma_info_inst_mix_ipflop",
"MetricThreshold": "tma_info_inst_mix_ipflop < 10",
@@ -1530,7 +1514,7 @@
},
{
"BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / MEM_INST_RETIRED.ALL_LOADS",
"MetricGroup": "InsType",
"MetricName": "tma_info_inst_mix_ipload",
"MetricThreshold": "tma_info_inst_mix_ipload < 3",
@@ -1538,7 +1522,7 @@
},
{
"BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / MEM_INST_RETIRED.ALL_STORES",
"MetricGroup": "InsType",
"MetricName": "tma_info_inst_mix_ipstore",
"MetricThreshold": "tma_info_inst_mix_ipstore < 8",
@@ -1546,7 +1530,7 @@
},
{
"BriefDescription": "Instructions per Software prefetch instruction (of any type: NTA/T0/T1/T2/Prefetch) (lower number means higher occurrence rate)",
- "MetricExpr": "INST_RETIRED.ANY / cpu_core@SW_PREFETCH_ACCESS.T0\\,umask\\=0xF@",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@SW_PREFETCH_ACCESS.T0\\,umask\\=0xF@",
"MetricGroup": "Prefetches",
"MetricName": "tma_info_inst_mix_ipswpf",
"MetricThreshold": "tma_info_inst_mix_ipswpf < 100",
@@ -1554,7 +1538,7 @@
},
{
"BriefDescription": "Instruction per taken branch",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO;tma_issueFB",
"MetricName": "tma_info_inst_mix_iptb",
"MetricThreshold": "tma_info_inst_mix_iptb < 13",
@@ -1654,14 +1638,14 @@
},
{
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
- "MetricExpr": "L1D_PEND_MISS.PENDING / MEM_LOAD_COMPLETED.L1_MISS_ANY",
+ "MetricExpr": "cpu_core@L1D_PEND_MISS.PENDING@ / MEM_LOAD_COMPLETED.L1_MISS_ANY",
"MetricGroup": "Mem;MemoryBound;MemoryLat",
"MetricName": "tma_info_memory_load_miss_real_latency",
"Unit": "cpu_core"
},
{
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
- "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricExpr": "cpu_core@L1D_PEND_MISS.PENDING@ / L1D_PEND_MISS.PENDING_CYCLES",
"MetricGroup": "Mem;MemoryBW;MemoryBound",
"MetricName": "tma_info_memory_mlp",
"PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
@@ -1669,28 +1653,28 @@
},
{
"BriefDescription": "Average Parallel L2 cache miss data reads",
- "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD@ / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
"MetricGroup": "Memory_BW;Offcore",
"MetricName": "tma_info_memory_oro_data_l2_mlp",
"Unit": "cpu_core"
},
{
"BriefDescription": "Average Latency for L2 cache miss demand Loads",
- "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
+ "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD@ / OFFCORE_REQUESTS.DEMAND_DATA_RD",
"MetricGroup": "Memory_Lat;Offcore",
"MetricName": "tma_info_memory_oro_load_l2_miss_latency",
"Unit": "cpu_core"
},
{
"BriefDescription": "Average Parallel L2 cache miss demand Loads",
- "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@",
+ "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD@ / cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@",
"MetricGroup": "Memory_BW;Offcore",
"MetricName": "tma_info_memory_oro_load_l2_mlp",
"Unit": "cpu_core"
},
{
"BriefDescription": "Average Latency for L3 cache miss demand Loads",
- "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+ "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD@ / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
"MetricGroup": "Memory_Lat;Offcore",
"MetricName": "tma_info_memory_oro_load_l3_miss_latency",
"Unit": "cpu_core"
@@ -1754,14 +1738,14 @@
},
{
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
- "MetricExpr": "UOPS_EXECUTED.THREAD / cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+ "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute",
"Unit": "cpu_core"
},
{
"BriefDescription": "Instructions per a microcode Assist invocation",
- "MetricExpr": "INST_RETIRED.ANY / cpu_core@ASSISTS.ANY\\,umask\\=0x1B@",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@ASSISTS.ANY\\,umask\\=0x1B@",
"MetricGroup": "Pipeline;Ret;Retire",
"MetricName": "tma_info_pipeline_ipassist",
"MetricThreshold": "tma_info_pipeline_ipassist < 100e3",
@@ -1777,7 +1761,7 @@
},
{
"BriefDescription": "Estimated fraction of retirement-cycles dealing with repeat instructions",
- "MetricExpr": "INST_RETIRED.REP_ITERATION / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
+ "MetricExpr": "cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
"MetricGroup": "Pipeline;Ret",
"MetricName": "tma_info_pipeline_strings_cycles",
"MetricThreshold": "tma_info_pipeline_strings_cycles > 0.1",
@@ -1792,7 +1776,7 @@
},
{
"BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.REF_TSC@ / TSC",
"MetricGroup": "HPC;Summary",
"MetricName": "tma_info_system_cpu_utilization",
"Unit": "cpu_core"
@@ -1815,7 +1799,7 @@
},
{
"BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
- "MetricExpr": "INST_RETIRED.ANY / cpu_core@BR_INST_RETIRED.FAR_BRANCH@u",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@BR_INST_RETIRED.FAR_BRANCH@u",
"MetricGroup": "Branches;OS",
"MetricName": "tma_info_system_ipfarbranch",
"MetricThreshold": "tma_info_system_ipfarbranch < 1e6",
@@ -1838,7 +1822,7 @@
},
{
"BriefDescription": "Average number of parallel data read requests to external memory",
- "MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / cpu_core@UNC_ARB_DAT_OCCUPANCY.RD\\,cmask\\=1@",
+ "MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / UNC_ARB_DAT_OCCUPANCY.RD@cmask\\=1@",
"MetricGroup": "Mem;MemoryBW;SoC",
"MetricName": "tma_info_system_mem_parallel_reads",
"PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
@@ -1846,6 +1830,7 @@
},
{
"BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.RD + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.RD",
"MetricGroup": "Mem;MemoryLat;SoC",
"MetricName": "tma_info_system_mem_read_latency",
@@ -1854,6 +1839,7 @@
},
{
"BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.ALL + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.ALL",
"MetricGroup": "Mem;SoC",
"MetricName": "tma_info_system_mem_request_latency",
@@ -1896,7 +1882,7 @@
},
{
"BriefDescription": "The ratio of Executed- by Issued-Uops",
- "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
+ "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / UOPS_ISSUED.ANY",
"MetricGroup": "Cor;Pipeline",
"MetricName": "tma_info_thread_execute_per_issue",
"PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage.",
@@ -1904,7 +1890,7 @@
},
{
"BriefDescription": "Instructions Per Cycle (per Logical Processor)",
- "MetricExpr": "INST_RETIRED.ANY / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / tma_info_thread_clks",
"MetricGroup": "Ret;Summary",
"MetricName": "tma_info_thread_ipc",
"Unit": "cpu_core"
@@ -1971,7 +1957,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
- "MetricExpr": "ICACHE_TAG.STALLS / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@ICACHE_TAG.STALLS@ / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_itlb_misses",
"MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -1991,7 +1977,6 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(cpu_core@MEMORY_ACTIVITY.STALLS_L1D_MISS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L2_MISS@) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l2_bound",
@@ -2002,7 +1987,6 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(cpu_core@MEMORY_ACTIVITY.STALLS_L2_MISS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L3_MISS@) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
@@ -2023,7 +2007,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
- "MetricExpr": "DECODE.LCP / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@DECODE.LCP@ / tma_info_thread_clks",
"MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
"MetricName": "tma_lcp",
"MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -2044,7 +2028,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
- "MetricExpr": "UOPS_DISPATCHED.PORT_2_3_10 / (3 * tma_info_core_core_clks)",
+ "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_2_3_10@ / (3 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
"MetricThreshold": "tma_load_op_utilization > 0.6",
@@ -2063,7 +2047,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles where the Second-level TLB (STLB) was missed by load accesses, performing a hardware page walk",
- "MetricExpr": "DTLB_LOAD_MISSES.WALK_ACTIVE / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@DTLB_LOAD_MISSES.WALK_ACTIVE@ / tma_info_thread_clks",
"MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_load_group",
"MetricName": "tma_load_stlb_miss",
"MetricThreshold": "tma_load_stlb_miss > 0.05 & (tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -2072,7 +2056,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(16 * max(0, cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ - cpu_core@L2_RQSTS.ALL_RFO@) + cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ / cpu_core@MEM_INST_RETIRED.ALL_STORES@ * (10 * cpu_core@L2_RQSTS.RFO_HIT@ + min(cpu_core@CPU_CLK_UNHALTED.THREAD@, cpu_core@OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO@))) / tma_info_thread_clks",
"MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
@@ -2135,6 +2118,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * cpu_core@MISC2_RETIRED.LFENCE@ / tma_info_thread_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
"MetricName": "tma_memory_fence",
@@ -2144,7 +2128,6 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_light_operations * cpu_core@MEM_UOP_RETIRED.ANY@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_memory_operations",
@@ -2154,7 +2137,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
- "MetricExpr": "UOPS_RETIRED.MS / tma_info_thread_slots",
+ "MetricExpr": "cpu_core@UOPS_RETIRED.MS@ / tma_info_thread_slots",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
"MetricName": "tma_microcode_sequencer",
"MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
@@ -2224,7 +2207,6 @@
},
{
"BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches + tma_nop_instructions))",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_other_light_ops",
@@ -2245,7 +2227,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)",
- "MetricExpr": "UOPS_DISPATCHED.PORT_0 / tma_info_core_core_clks",
+ "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_0@ / tma_info_core_core_clks",
"MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_0",
"MetricThreshold": "tma_port_0 > 0.6",
@@ -2255,7 +2237,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU)",
- "MetricExpr": "UOPS_DISPATCHED.PORT_1 / tma_info_core_core_clks",
+ "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_1@ / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_1",
"MetricThreshold": "tma_port_1 > 0.6",
@@ -2265,7 +2247,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
- "MetricExpr": "UOPS_DISPATCHED.PORT_6 / tma_info_core_core_clks",
+ "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_6@ / tma_info_core_core_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
"MetricName": "tma_port_6",
"MetricThreshold": "tma_port_6 > 0.6",
@@ -2295,7 +2277,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "EXE_ACTIVITY.1_PORTS_UTIL / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -2305,7 +2287,8 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_thread_clks",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
+ "MetricExpr": "cpu_core@EXE_ACTIVITY.2_PORTS_UTIL@ / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -2315,7 +2298,8 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
+ "MetricExpr": "cpu_core@UOPS_EXECUTED.CYCLES_GE_3@ / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
"MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -2337,7 +2321,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations",
- "MetricExpr": "RESOURCE_STALLS.SCOREBOARD / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@RESOURCE_STALLS.SCOREBOARD@ / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL5;tma_L5_group;tma_issueSO;tma_ports_utilized_0_group",
"MetricName": "tma_serializing_operation",
"MetricThreshold": "tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))",
@@ -2347,7 +2331,7 @@
},
{
"BriefDescription": "This metric represents Shuffle (cross \"vector lane\" data transfers) uops fraction the CPU has retired.",
- "MetricExpr": "INT_VEC_RETIRED.SHUFFLES / (tma_retiring * tma_info_thread_slots)",
+ "MetricExpr": "cpu_core@INT_VEC_RETIRED.SHUFFLES@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "HPC;Pipeline;TopdownL4;tma_L4_group;tma_int_operations_group",
"MetricName": "tma_shuffles",
"MetricThreshold": "tma_shuffles > 0.1 & (tma_int_operations > 0.1 & tma_light_operations > 0.6)",
@@ -2356,7 +2340,8 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
- "MetricExpr": "CPU_CLK_UNHALTED.PAUSE / tma_info_thread_clks",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
+ "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.PAUSE@ / tma_info_thread_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
"MetricName": "tma_slow_pause",
"MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))))",
@@ -2376,8 +2361,7 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
- "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
+ "MetricExpr": "cpu_core@MEM_INST_RETIRED.SPLIT_STORES@ / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
"MetricThreshold": "tma_split_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -2397,7 +2381,7 @@
},
{
"BriefDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write",
- "MetricExpr": "EXE_ACTIVITY.BOUND_ON_STORES / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@EXE_ACTIVITY.BOUND_ON_STORES@ / tma_info_thread_clks",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_store_bound",
"MetricThreshold": "tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -2407,7 +2391,6 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * cpu_core@LD_BLOCKS.STORE_FORWARD@ / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
@@ -2447,7 +2430,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles where the STLB was missed by store accesses, performing a hardware page walk",
- "MetricExpr": "DTLB_STORE_MISSES.WALK_ACTIVE / tma_info_core_core_clks",
+ "MetricExpr": "cpu_core@DTLB_STORE_MISSES.WALK_ACTIVE@ / tma_info_core_core_clks",
"MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_store_group",
"MetricName": "tma_store_stlb_miss",
"MetricThreshold": "tma_store_stlb_miss > 0.05 & (tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -2466,7 +2449,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears",
- "MetricExpr": "INT_MISC.UNKNOWN_BRANCH_CYCLES / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@INT_MISC.UNKNOWN_BRANCH_CYCLES@ / tma_info_thread_clks",
"MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
"MetricName": "tma_unknown_branches",
"MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
index c150c14ac6..a35edf7d86 100644
--- a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
@@ -195,7 +195,6 @@
},
{
"BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_BOUND_STALLS.LOAD",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
@@ -457,7 +456,6 @@
},
{
"BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 Cache.",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_BOUND_STALLS.LOAD",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l2_bound",
@@ -466,7 +464,6 @@
},
{
"BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_BOUND_STALLS.LOAD",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
@@ -683,7 +680,6 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
diff --git a/tools/perf/pmu-events/arch/x86/amdzen4/cache.json b/tools/perf/pmu-events/arch/x86/amdzen4/cache.json
index ecbe9660b2..e6d710cf3c 100644
--- a/tools/perf/pmu-events/arch/x86/amdzen4/cache.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen4/cache.json
@@ -676,6 +676,10 @@
"EventCode": "0xac",
"BriefDescription": "Average sampled latency when data is sourced from DRAM in the same NUMA node.",
"UMask": "0x01",
+ "EnAllCores": "0x1",
+ "EnAllSlices": "0x1",
+ "SliceId": "0x3",
+ "ThreadMask": "0x3",
"Unit": "L3PMC"
},
{
@@ -683,6 +687,10 @@
"EventCode": "0xac",
"BriefDescription": "Average sampled latency when data is sourced from DRAM in a different NUMA node.",
"UMask": "0x02",
+ "EnAllCores": "0x1",
+ "EnAllSlices": "0x1",
+ "SliceId": "0x3",
+ "ThreadMask": "0x3",
"Unit": "L3PMC"
},
{
@@ -690,6 +698,10 @@
"EventCode": "0xac",
"BriefDescription": "Average sampled latency when data is sourced from another CCX's cache when the address was in the same NUMA node.",
"UMask": "0x04",
+ "EnAllCores": "0x1",
+ "EnAllSlices": "0x1",
+ "SliceId": "0x3",
+ "ThreadMask": "0x3",
"Unit": "L3PMC"
},
{
@@ -697,6 +709,10 @@
"EventCode": "0xac",
"BriefDescription": "Average sampled latency when data is sourced from another CCX's cache when the address was in a different NUMA node.",
"UMask": "0x08",
+ "EnAllCores": "0x1",
+ "EnAllSlices": "0x1",
+ "SliceId": "0x3",
+ "ThreadMask": "0x3",
"Unit": "L3PMC"
},
{
@@ -704,6 +720,10 @@
"EventCode": "0xac",
"BriefDescription": "Average sampled latency when data is sourced from extension memory (CXL) in the same NUMA node.",
"UMask": "0x10",
+ "EnAllCores": "0x1",
+ "EnAllSlices": "0x1",
+ "SliceId": "0x3",
+ "ThreadMask": "0x3",
"Unit": "L3PMC"
},
{
@@ -711,6 +731,10 @@
"EventCode": "0xac",
"BriefDescription": "Average sampled latency when data is sourced from extension memory (CXL) in a different NUMA node.",
"UMask": "0x20",
+ "EnAllCores": "0x1",
+ "EnAllSlices": "0x1",
+ "SliceId": "0x3",
+ "ThreadMask": "0x3",
"Unit": "L3PMC"
},
{
@@ -718,6 +742,10 @@
"EventCode": "0xac",
"BriefDescription": "Average sampled latency from all data sources.",
"UMask": "0x3f",
+ "EnAllCores": "0x1",
+ "EnAllSlices": "0x1",
+ "SliceId": "0x3",
+ "ThreadMask": "0x3",
"Unit": "L3PMC"
},
{
@@ -725,6 +753,10 @@
"EventCode": "0xad",
"BriefDescription": "L3 cache fill requests sourced from DRAM in the same NUMA node.",
"UMask": "0x01",
+ "EnAllCores": "0x1",
+ "EnAllSlices": "0x1",
+ "SliceId": "0x3",
+ "ThreadMask": "0x3",
"Unit": "L3PMC"
},
{
@@ -732,6 +764,10 @@
"EventCode": "0xad",
"BriefDescription": "L3 cache fill requests sourced from DRAM in a different NUMA node.",
"UMask": "0x02",
+ "EnAllCores": "0x1",
+ "EnAllSlices": "0x1",
+ "SliceId": "0x3",
+ "ThreadMask": "0x3",
"Unit": "L3PMC"
},
{
@@ -739,6 +775,10 @@
"EventCode": "0xad",
"BriefDescription": "L3 cache fill requests sourced from another CCX's cache when the address was in the same NUMA node.",
"UMask": "0x04",
+ "EnAllCores": "0x1",
+ "EnAllSlices": "0x1",
+ "SliceId": "0x3",
+ "ThreadMask": "0x3",
"Unit": "L3PMC"
},
{
@@ -746,6 +786,10 @@
"EventCode": "0xad",
"BriefDescription": "L3 cache fill requests sourced from another CCX's cache when the address was in a different NUMA node.",
"UMask": "0x08",
+ "EnAllCores": "0x1",
+ "EnAllSlices": "0x1",
+ "SliceId": "0x3",
+ "ThreadMask": "0x3",
"Unit": "L3PMC"
},
{
@@ -753,6 +797,10 @@
"EventCode": "0xad",
"BriefDescription": "L3 cache fill requests sourced from extension memory (CXL) in the same NUMA node.",
"UMask": "0x10",
+ "EnAllCores": "0x1",
+ "EnAllSlices": "0x1",
+ "SliceId": "0x3",
+ "ThreadMask": "0x3",
"Unit": "L3PMC"
},
{
@@ -760,6 +808,10 @@
"EventCode": "0xad",
"BriefDescription": "L3 cache fill requests sourced from extension memory (CXL) in a different NUMA node.",
"UMask": "0x20",
+ "EnAllCores": "0x1",
+ "EnAllSlices": "0x1",
+ "SliceId": "0x3",
+ "ThreadMask": "0x3",
"Unit": "L3PMC"
},
{
@@ -767,6 +819,10 @@
"EventCode": "0xad",
"BriefDescription": "L3 cache fill requests sourced from all data sources.",
"UMask": "0x3f",
+ "EnAllCores": "0x1",
+ "EnAllSlices": "0x1",
+ "SliceId": "0x3",
+ "ThreadMask": "0x3",
"Unit": "L3PMC"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen4/memory-controller.json b/tools/perf/pmu-events/arch/x86/amdzen4/memory-controller.json
new file mode 100644
index 0000000000..55263e5e4f
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen4/memory-controller.json
@@ -0,0 +1,101 @@
+[
+ {
+ "EventName": "umc_mem_clk",
+ "PublicDescription": "Number of memory clock cycles.",
+ "EventCode": "0x00",
+ "PerPkg": "1",
+ "Unit": "UMCPMC"
+ },
+ {
+ "EventName": "umc_act_cmd.all",
+ "PublicDescription": "Number of ACTIVATE commands sent.",
+ "EventCode": "0x05",
+ "PerPkg": "1",
+ "Unit": "UMCPMC"
+ },
+ {
+ "EventName": "umc_act_cmd.rd",
+ "PublicDescription": "Number of ACTIVATE commands sent for reads.",
+ "EventCode": "0x05",
+ "RdWrMask": "0x1",
+ "PerPkg": "1",
+ "Unit": "UMCPMC"
+ },
+ {
+ "EventName": "umc_act_cmd.wr",
+ "PublicDescription": "Number of ACTIVATE commands sent for writes.",
+ "EventCode": "0x05",
+ "RdWrMask": "0x2",
+ "PerPkg": "1",
+ "Unit": "UMCPMC"
+ },
+ {
+ "EventName": "umc_pchg_cmd.all",
+ "PublicDescription": "Number of PRECHARGE commands sent.",
+ "EventCode": "0x06",
+ "PerPkg": "1",
+ "Unit": "UMCPMC"
+ },
+ {
+ "EventName": "umc_pchg_cmd.rd",
+ "PublicDescription": "Number of PRECHARGE commands sent for reads.",
+ "EventCode": "0x06",
+ "RdWrMask": "0x1",
+ "PerPkg": "1",
+ "Unit": "UMCPMC"
+ },
+ {
+ "EventName": "umc_pchg_cmd.wr",
+ "PublicDescription": "Number of PRECHARGE commands sent for writes.",
+ "EventCode": "0x06",
+ "RdWrMask": "0x2",
+ "PerPkg": "1",
+ "Unit": "UMCPMC"
+ },
+ {
+ "EventName": "umc_cas_cmd.all",
+ "PublicDescription": "Number of CAS commands sent.",
+ "EventCode": "0x0a",
+ "PerPkg": "1",
+ "Unit": "UMCPMC"
+ },
+ {
+ "EventName": "umc_cas_cmd.rd",
+ "PublicDescription": "Number of CAS commands sent for reads.",
+ "EventCode": "0x0a",
+ "RdWrMask": "0x1",
+ "PerPkg": "1",
+ "Unit": "UMCPMC"
+ },
+ {
+ "EventName": "umc_cas_cmd.wr",
+ "PublicDescription": "Number of CAS commands sent for writes.",
+ "EventCode": "0x0a",
+ "RdWrMask": "0x2",
+ "PerPkg": "1",
+ "Unit": "UMCPMC"
+ },
+ {
+ "EventName": "umc_data_slot_clks.all",
+ "PublicDescription": "Number of clocks used by the data bus.",
+ "EventCode": "0x14",
+ "PerPkg": "1",
+ "Unit": "UMCPMC"
+ },
+ {
+ "EventName": "umc_data_slot_clks.rd",
+ "PublicDescription": "Number of clocks used by the data bus for reads.",
+ "EventCode": "0x14",
+ "RdWrMask": "0x1",
+ "PerPkg": "1",
+ "Unit": "UMCPMC"
+ },
+ {
+ "EventName": "umc_data_slot_clks.wr",
+ "PublicDescription": "Number of clocks used by the data bus for writes.",
+ "EventCode": "0x14",
+ "RdWrMask": "0x2",
+ "PerPkg": "1",
+ "Unit": "UMCPMC"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen4/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen4/recommended.json
index 5e6a793acf..96e06401c6 100644
--- a/tools/perf/pmu-events/arch/x86/amdzen4/recommended.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen4/recommended.json
@@ -330,5 +330,89 @@
"MetricGroup": "data_fabric",
"PerPkg": "1",
"ScaleUnit": "6.103515625e-5MiB"
+ },
+ {
+ "MetricName": "umc_data_bus_utilization",
+ "BriefDescription": "Memory controller data bus utilization.",
+ "MetricExpr": "d_ratio(umc_data_slot_clks.all / 2, umc_mem_clk)",
+ "MetricGroup": "memory_controller",
+ "PerPkg": "1",
+ "ScaleUnit": "100%"
+ },
+ {
+ "MetricName": "umc_cas_cmd_rate",
+ "BriefDescription": "Memory controller CAS command rate.",
+ "MetricExpr": "d_ratio(umc_cas_cmd.all * 1000, umc_mem_clk)",
+ "MetricGroup": "memory_controller",
+ "PerPkg": "1"
+ },
+ {
+ "MetricName": "umc_cas_cmd_read_ratio",
+ "BriefDescription": "Ratio of memory controller CAS commands for reads.",
+ "MetricExpr": "d_ratio(umc_cas_cmd.rd, umc_cas_cmd.all)",
+ "MetricGroup": "memory_controller",
+ "PerPkg": "1",
+ "ScaleUnit": "100%"
+ },
+ {
+ "MetricName": "umc_cas_cmd_write_ratio",
+ "BriefDescription": "Ratio of memory controller CAS commands for writes.",
+ "MetricExpr": "d_ratio(umc_cas_cmd.wr, umc_cas_cmd.all)",
+ "MetricGroup": "memory_controller",
+ "PerPkg": "1",
+ "ScaleUnit": "100%"
+ },
+ {
+ "MetricName": "umc_mem_read_bandwidth",
+ "BriefDescription": "Estimated memory read bandwidth.",
+ "MetricExpr": "(umc_cas_cmd.rd * 64) / 1e6 / duration_time",
+ "MetricGroup": "memory_controller",
+ "PerPkg": "1",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "MetricName": "umc_mem_write_bandwidth",
+ "BriefDescription": "Estimated memory write bandwidth.",
+ "MetricExpr": "(umc_cas_cmd.wr * 64) / 1e6 / duration_time",
+ "MetricGroup": "memory_controller",
+ "PerPkg": "1",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "MetricName": "umc_mem_bandwidth",
+ "BriefDescription": "Estimated combined memory bandwidth.",
+ "MetricExpr": "(umc_cas_cmd.all * 64) / 1e6 / duration_time",
+ "MetricGroup": "memory_controller",
+ "PerPkg": "1",
+ "ScaleUnit": "1MB/s"
+ },
+ {
+ "MetricName": "umc_cas_cmd_read_ratio",
+ "BriefDescription": "Ratio of memory controller CAS commands for reads.",
+ "MetricExpr": "d_ratio(umc_cas_cmd.rd, umc_cas_cmd.all)",
+ "MetricGroup": "memory_controller",
+ "PerPkg": "1",
+ "ScaleUnit": "100%"
+ },
+ {
+ "MetricName": "umc_cas_cmd_rate",
+ "BriefDescription": "Memory controller CAS command rate.",
+ "MetricExpr": "d_ratio(umc_cas_cmd.all * 1000, umc_mem_clk)",
+ "MetricGroup": "memory_controller",
+ "PerPkg": "1"
+ },
+ {
+ "MetricName": "umc_activate_cmd_rate",
+ "BriefDescription": "Memory controller ACTIVATE command rate.",
+ "MetricExpr": "d_ratio(umc_act_cmd.all * 1000, umc_mem_clk)",
+ "MetricGroup": "memory_controller",
+ "PerPkg": "1"
+ },
+ {
+ "MetricName": "umc_precharge_cmd_rate",
+ "BriefDescription": "Memory controller PRECHARGE command rate.",
+ "MetricExpr": "d_ratio(umc_pchg_cmd.all * 1000, umc_mem_clk)",
+ "MetricGroup": "memory_controller",
+ "PerPkg": "1"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
index 84c132af3d..8bc6c07078 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
@@ -1863,6 +1863,12 @@
"ScaleUnit": "1GHz"
},
{
+ "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)",
+ "MetricExpr": "UNC_UPI_RxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time",
+ "MetricName": "upi_data_receive_bw",
+ "ScaleUnit": "1MB/s"
+ },
+ {
"BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)",
"MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time",
"MetricName": "upi_data_transmit_bw",
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json
index 4a9d211e9d..1bdefaf962 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json
@@ -23,27 +23,48 @@
"UMask": "0x10"
},
{
- "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0",
+ "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0 [This event is alias to FP_ARITH_DISPATCHED.V0]",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
{
- "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1",
+ "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1 [This event is alias to FP_ARITH_DISPATCHED.V1]",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_1",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
{
- "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5",
+ "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5 [This event is alias to FP_ARITH_DISPATCHED.V2]",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_5",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
{
+ "BriefDescription": "FP_ARITH_DISPATCHED.V0 [This event is alias to FP_ARITH_DISPATCHED.PORT_0]",
+ "EventCode": "0xb3",
+ "EventName": "FP_ARITH_DISPATCHED.V0",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "FP_ARITH_DISPATCHED.V1 [This event is alias to FP_ARITH_DISPATCHED.PORT_1]",
+ "EventCode": "0xb3",
+ "EventName": "FP_ARITH_DISPATCHED.V1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "FP_ARITH_DISPATCHED.V2 [This event is alias to FP_ARITH_DISPATCHED.PORT_5]",
+ "EventCode": "0xb3",
+ "EventName": "FP_ARITH_DISPATCHED.V2",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4"
+ },
+ {
"BriefDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json
index 6dcf3b763a..1f8200fb89 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json
@@ -1,21 +1,5 @@
[
{
- "BriefDescription": "AMX retired arithmetic BF16 operations.",
- "EventCode": "0xce",
- "EventName": "AMX_OPS_RETIRED.BF16",
- "PublicDescription": "Number of AMX-based retired arithmetic bfloat16 (BF16) floating-point operations. Counts TDPBF16PS FP instructions. SW to use operation multiplier of 4",
- "SampleAfterValue": "1000003",
- "UMask": "0x2"
- },
- {
- "BriefDescription": "AMX retired arithmetic integer 8-bit operations.",
- "EventCode": "0xce",
- "EventName": "AMX_OPS_RETIRED.INT8",
- "PublicDescription": "Number of AMX-based retired arithmetic integer operations of 8-bit width source operands. Counts TDPB[SS,UU,US,SU]D instructions. SW should use operation multiplier of 8.",
- "SampleAfterValue": "1000003",
- "UMask": "0x1"
- },
- {
"BriefDescription": "This event is deprecated. Refer to new event ARITH.DIV_ACTIVE",
"CounterMask": "1",
"Deprecated": "1",
@@ -505,7 +489,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "INT_MISC.UNKNOWN_BRANCH_CYCLES",
+ "BriefDescription": "Bubble cycles of BAClear (Unknown Branch).",
"EventCode": "0xad",
"EventName": "INT_MISC.UNKNOWN_BRANCH_CYCLES",
"MSRIndex": "0x3F7",
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-interconnect.json
index 09d840c7da..65d088556b 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-interconnect.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-interconnect.json
@@ -4825,11 +4825,11 @@
"Unit": "M3UPI"
},
{
- "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (AD Bouncable)",
+ "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (AD Bounceable)",
"EventCode": "0x47",
"EventName": "UNC_MDF_CRS_TxR_INSERTS.AD_BNC",
"PerPkg": "1",
- "PublicDescription": "AD Bouncable : Number of allocations into the CRS Egress",
+ "PublicDescription": "AD Bounceable : Number of allocations into the CRS Egress",
"UMask": "0x1",
"Unit": "MDF"
},
@@ -4861,11 +4861,11 @@
"Unit": "MDF"
},
{
- "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (BL Bouncable)",
+ "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (BL Bounceable)",
"EventCode": "0x47",
"EventName": "UNC_MDF_CRS_TxR_INSERTS.BL_BNC",
"PerPkg": "1",
- "PublicDescription": "BL Bouncable : Number of allocations into the CRS Egress",
+ "PublicDescription": "BL Bounceable : Number of allocations into the CRS Egress",
"UMask": "0x4",
"Unit": "MDF"
},
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-io.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-io.json
index 557080b74e..0761980c34 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-io.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-io.json
@@ -1186,6 +1186,36 @@
"Unit": "IIO"
},
{
+ "BriefDescription": ": IOTLB Hits to a 1G Page",
+ "EventCode": "0x40",
+ "EventName": "UNC_IIO_IOMMU0.1G_HITS",
+ "PerPkg": "1",
+ "PortMask": "0x0000",
+ "PublicDescription": ": IOTLB Hits to a 1G Page : Counts if a transaction to a 1G page, on its first lookup, hits the IOTLB.",
+ "UMask": "0x10",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": ": IOTLB Hits to a 2M Page",
+ "EventCode": "0x40",
+ "EventName": "UNC_IIO_IOMMU0.2M_HITS",
+ "PerPkg": "1",
+ "PortMask": "0x0000",
+ "PublicDescription": ": IOTLB Hits to a 2M Page : Counts if a transaction to a 2M page, on its first lookup, hits the IOTLB.",
+ "UMask": "0x8",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": ": IOTLB Hits to a 4K Page",
+ "EventCode": "0x40",
+ "EventName": "UNC_IIO_IOMMU0.4K_HITS",
+ "PerPkg": "1",
+ "PortMask": "0x0000",
+ "PublicDescription": ": IOTLB Hits to a 4K Page : Counts if a transaction to a 4K page, on its first lookup, hits the IOTLB.",
+ "UMask": "0x4",
+ "Unit": "IIO"
+ },
+ {
"BriefDescription": ": Context cache hits",
"EventCode": "0x40",
"EventName": "UNC_IIO_IOMMU0.CTXT_CACHE_HITS",
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
index e98602c667..71d78a7841 100644
--- a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
@@ -1847,6 +1847,12 @@
"ScaleUnit": "1GHz"
},
{
+ "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)",
+ "MetricExpr": "UNC_UPI_RxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time",
+ "MetricName": "upi_data_receive_bw",
+ "ScaleUnit": "1MB/s"
+ },
+ {
"BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)",
"MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time",
"MetricName": "upi_data_transmit_bw",
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/other.json b/tools/perf/pmu-events/arch/x86/icelakex/other.json
index 63d5faf2fc..11810daaf1 100644
--- a/tools/perf/pmu-events/arch/x86/icelakex/other.json
+++ b/tools/perf/pmu-events/arch/x86/icelakex/other.json
@@ -19,7 +19,7 @@
"BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.",
"EventCode": "0x28",
"EventName": "CORE_POWER.LVL2_TURBO_LICENSE",
- "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server microarchtecture). This includes high current AVX 512-bit instructions.",
+ "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server microarchitecture). This includes high current AVX 512-bit instructions.",
"SampleAfterValue": "200003",
"UMask": "0x20"
},
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json b/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json
index 176e5ef2a2..45ee6bceba 100644
--- a/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json
@@ -519,7 +519,7 @@
"BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
"EventCode": "0x5e",
"EventName": "RS_EVENTS.EMPTY_CYCLES",
- "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into stravation periods (e.g. branch mispredictions or i-cache misses)",
+ "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses)",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json
index f87ea3f66d..a066a009c5 100644
--- a/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json
+++ b/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json
@@ -38,7 +38,7 @@
"EventCode": "0x10",
"EventName": "UNC_I_COHERENT_OPS.CLFLUSH",
"PerPkg": "1",
- "PublicDescription": "Coherent Ops : CLFlush : Counts the number of coherency related operations servied by the IRP",
+ "PublicDescription": "Coherent Ops : CLFlush : Counts the number of coherency related operations serviced by the IRP",
"UMask": "0x80",
"Unit": "IRP"
},
@@ -65,7 +65,7 @@
"EventCode": "0x10",
"EventName": "UNC_I_COHERENT_OPS.WBMTOI",
"PerPkg": "1",
- "PublicDescription": "Coherent Ops : WbMtoI : Counts the number of coherency related operations servied by the IRP",
+ "PublicDescription": "Coherent Ops : WbMtoI : Counts the number of coherency related operations serviced by the IRP",
"UMask": "0x40",
"Unit": "IRP"
},
@@ -454,7 +454,7 @@
"EventCode": "0x11",
"EventName": "UNC_I_TRANSACTIONS.WRITES",
"PerPkg": "1",
- "PublicDescription": "Inbound Transaction Count : Writes : Counts the number of Inbound transactions from the IRP to the Uncore. This can be filtered based on request type in addition to the source queue. Note the special filtering equation. We do OR-reduction on the request type. If the SOURCE bit is set, then we also do AND qualification based on the source portID. : Trackes only write requests. Each write request should have a prefetch, so there is no need to explicitly track these requests. For writes that are tickled and have to retry, the counter will be incremented for each retry.",
+ "PublicDescription": "Inbound Transaction Count : Writes : Counts the number of Inbound transactions from the IRP to the Uncore. This can be filtered based on request type in addition to the source queue. Note the special filtering equation. We do OR-reduction on the request type. If the SOURCE bit is set, then we also do AND qualification based on the source portID. : Tracks only write requests. Each write request should have a prefetch, so there is no need to explicitly track these requests. For writes that are tickled and have to retry, the counter will be incremented for each retry.",
"UMask": "0x2",
"Unit": "IRP"
},
diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv
index e571683f59..4d1deed443 100644
--- a/tools/perf/pmu-events/arch/x86/mapfile.csv
+++ b/tools/perf/pmu-events/arch/x86/mapfile.csv
@@ -7,7 +7,7 @@ GenuineIntel-6-56,v11,broadwellde,core
GenuineIntel-6-4F,v22,broadwellx,core
GenuineIntel-6-55-[56789ABCDEF],v1.20,cascadelakex,core
GenuineIntel-6-9[6C],v1.04,elkhartlake,core
-GenuineIntel-6-CF,v1.01,emeraldrapids,core
+GenuineIntel-6-CF,v1.02,emeraldrapids,core
GenuineIntel-6-5[CF],v13,goldmont,core
GenuineIntel-6-7A,v1.01,goldmontplus,core
GenuineIntel-6-B6,v1.00,grandridge,core
@@ -15,7 +15,7 @@ GenuineIntel-6-A[DE],v1.01,graniterapids,core
GenuineIntel-6-(3C|45|46),v33,haswell,core
GenuineIntel-6-3F,v28,haswellx,core
GenuineIntel-6-7[DE],v1.19,icelake,core
-GenuineIntel-6-6[AC],v1.21,icelakex,core
+GenuineIntel-6-6[AC],v1.23,icelakex,core
GenuineIntel-6-3A,v24,ivybridge,core
GenuineIntel-6-3E,v24,ivytown,core
GenuineIntel-6-2D,v24,jaketown,core
@@ -26,7 +26,7 @@ GenuineIntel-6-1[AEF],v4,nehalemep,core
GenuineIntel-6-2E,v4,nehalemex,core
GenuineIntel-6-A7,v1.01,rocketlake,core
GenuineIntel-6-2A,v19,sandybridge,core
-GenuineIntel-6-8F,v1.16,sapphirerapids,core
+GenuineIntel-6-8F,v1.17,sapphirerapids,core
GenuineIntel-6-AF,v1.00,sierraforest,core
GenuineIntel-6-(37|4A|4C|4D|5A),v15,silvermont,core
GenuineIntel-6-(4E|5E|8E|9E|A5|A6),v57,skylake,core
diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json b/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json
index 0c880e4156..27433fc15e 100644
--- a/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json
@@ -985,7 +985,7 @@
},
{
"BriefDescription": "Average number of parallel data read requests to external memory",
- "MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / cpu@UNC_ARB_DAT_OCCUPANCY.RD\\,cmask\\=1@",
+ "MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / UNC_ARB_DAT_OCCUPANCY.RD@cmask\\=1@",
"MetricGroup": "Mem;MemoryBW;SoC",
"MetricName": "tma_info_system_mem_parallel_reads",
"PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches"
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/floating-point.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/floating-point.json
index 4a9d211e9d..1bdefaf962 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/floating-point.json
@@ -23,27 +23,48 @@
"UMask": "0x10"
},
{
- "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0",
+ "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0 [This event is alias to FP_ARITH_DISPATCHED.V0]",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
{
- "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1",
+ "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1 [This event is alias to FP_ARITH_DISPATCHED.V1]",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_1",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
{
- "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5",
+ "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5 [This event is alias to FP_ARITH_DISPATCHED.V2]",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_5",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
{
+ "BriefDescription": "FP_ARITH_DISPATCHED.V0 [This event is alias to FP_ARITH_DISPATCHED.PORT_0]",
+ "EventCode": "0xb3",
+ "EventName": "FP_ARITH_DISPATCHED.V0",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "FP_ARITH_DISPATCHED.V1 [This event is alias to FP_ARITH_DISPATCHED.PORT_1]",
+ "EventCode": "0xb3",
+ "EventName": "FP_ARITH_DISPATCHED.V1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "FP_ARITH_DISPATCHED.V2 [This event is alias to FP_ARITH_DISPATCHED.PORT_5]",
+ "EventCode": "0xb3",
+ "EventName": "FP_ARITH_DISPATCHED.V2",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4"
+ },
+ {
"BriefDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json
index 6dcf3b763a..2cfe814d20 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json
@@ -505,7 +505,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "INT_MISC.UNKNOWN_BRANCH_CYCLES",
+ "BriefDescription": "Bubble cycles of BAClear (Unknown Branch).",
"EventCode": "0xad",
"EventName": "INT_MISC.UNKNOWN_BRANCH_CYCLES",
"MSRIndex": "0x3F7",
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
index 06c6d67cb7..56e54babcc 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
@@ -400,7 +400,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(76 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 75.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
@@ -421,7 +420,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "75.5 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD + MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (1 - OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_data_sharing",
@@ -449,7 +447,6 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks - tma_pmm_bound if #has_pmem > 0 else MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks)",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
@@ -656,7 +653,6 @@
},
{
"BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
"MetricGroup": "Bad;BrMispredicts;tma_issueBM",
"MetricName": "tma_info_bad_spec_branch_misprediction_cost",
@@ -699,7 +695,6 @@
},
{
"BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)",
"MetricGroup": "Cor;SMT",
"MetricName": "tma_info_botlnk_l0_core_bound_likely",
@@ -707,7 +702,6 @@
},
{
"BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_mite))",
"MetricGroup": "DSBmiss;Fed;tma_issueFB",
"MetricName": "tma_info_botlnk_l2_dsb_misses",
@@ -716,7 +710,6 @@
},
{
"BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
"MetricName": "tma_info_botlnk_l2_ic_misses",
@@ -725,7 +718,6 @@
},
{
"BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
"MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC",
"MetricName": "tma_info_bottleneck_big_code",
@@ -742,7 +734,6 @@
},
{
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
"MetricGroup": "Fed;FetchBW;Frontend",
"MetricName": "tma_info_bottleneck_instruction_fetch_bw",
@@ -750,7 +741,6 @@
},
{
"BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
"MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
"MetricName": "tma_info_bottleneck_memory_bandwidth",
@@ -759,7 +749,6 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
"MetricName": "tma_info_bottleneck_memory_data_tlbs",
@@ -768,7 +757,6 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound))",
"MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
"MetricName": "tma_info_bottleneck_memory_latency",
@@ -777,7 +765,6 @@
},
{
"BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
"MetricName": "tma_info_bottleneck_mispredictions",
@@ -1301,6 +1288,7 @@
},
{
"BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_INSERTS.IA_MISS_DRD) / (tma_info_system_socket_clks / duration_time)",
"MetricGroup": "Mem;MemoryLat;SoC",
"MetricName": "tma_info_system_mem_read_latency",
@@ -1455,7 +1443,6 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(MEMORY_ACTIVITY.STALLS_L1D_MISS - MEMORY_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l2_bound",
@@ -1465,7 +1452,6 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(MEMORY_ACTIVITY.STALLS_L2_MISS - MEMORY_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
"MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
@@ -1538,7 +1524,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(16 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (10 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_thread_clks",
"MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
@@ -1596,6 +1581,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * MISC2_RETIRED.LFENCE / tma_info_thread_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
"MetricName": "tma_memory_fence",
@@ -1604,7 +1590,6 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_light_operations * MEM_UOP_RETIRED.ANY / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_memory_operations",
@@ -1676,7 +1661,6 @@
},
{
"BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches + tma_nop_instructions))",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_other_light_ops",
@@ -1758,6 +1742,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
@@ -1767,6 +1752,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
@@ -1822,6 +1808,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "CPU_CLK_UNHALTED.PAUSE / tma_info_thread_clks",
"MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
"MetricName": "tma_slow_pause",
@@ -1840,7 +1827,6 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
@@ -1868,7 +1854,6 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
@@ -1965,6 +1950,12 @@
"ScaleUnit": "1GHz"
},
{
+ "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)",
+ "MetricExpr": "UNC_UPI_RxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time",
+ "MetricName": "upi_data_receive_bw",
+ "ScaleUnit": "1MB/s"
+ },
+ {
"BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)",
"MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time",
"MetricName": "upi_data_transmit_bw",
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json
index 09d840c7da..65d088556b 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json
@@ -4825,11 +4825,11 @@
"Unit": "M3UPI"
},
{
- "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (AD Bouncable)",
+ "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (AD Bounceable)",
"EventCode": "0x47",
"EventName": "UNC_MDF_CRS_TxR_INSERTS.AD_BNC",
"PerPkg": "1",
- "PublicDescription": "AD Bouncable : Number of allocations into the CRS Egress",
+ "PublicDescription": "AD Bounceable : Number of allocations into the CRS Egress",
"UMask": "0x1",
"Unit": "MDF"
},
@@ -4861,11 +4861,11 @@
"Unit": "MDF"
},
{
- "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (BL Bouncable)",
+ "BriefDescription": "Number of allocations into the CRS Egress used to queue up requests destined to the mesh (BL Bounceable)",
"EventCode": "0x47",
"EventName": "UNC_MDF_CRS_TxR_INSERTS.BL_BNC",
"PerPkg": "1",
- "PublicDescription": "BL Bouncable : Number of allocations into the CRS Egress",
+ "PublicDescription": "BL Bounceable : Number of allocations into the CRS Egress",
"UMask": "0x4",
"Unit": "MDF"
},
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-io.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-io.json
index 8b5f54fed1..03596db877 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-io.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-io.json
@@ -1250,6 +1250,36 @@
"Unit": "IIO"
},
{
+ "BriefDescription": ": IOTLB Hits to a 1G Page",
+ "EventCode": "0x40",
+ "EventName": "UNC_IIO_IOMMU0.1G_HITS",
+ "PerPkg": "1",
+ "PortMask": "0x0000",
+ "PublicDescription": ": IOTLB Hits to a 1G Page : Counts if a transaction to a 1G page, on its first lookup, hits the IOTLB.",
+ "UMask": "0x10",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": ": IOTLB Hits to a 2M Page",
+ "EventCode": "0x40",
+ "EventName": "UNC_IIO_IOMMU0.2M_HITS",
+ "PerPkg": "1",
+ "PortMask": "0x0000",
+ "PublicDescription": ": IOTLB Hits to a 2M Page : Counts if a transaction to a 2M page, on its first lookup, hits the IOTLB.",
+ "UMask": "0x8",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": ": IOTLB Hits to a 4K Page",
+ "EventCode": "0x40",
+ "EventName": "UNC_IIO_IOMMU0.4K_HITS",
+ "PerPkg": "1",
+ "PortMask": "0x0000",
+ "PublicDescription": ": IOTLB Hits to a 4K Page : Counts if a transaction to a 4K page, on its first lookup, hits the IOTLB.",
+ "UMask": "0x4",
+ "Unit": "IIO"
+ },
+ {
"BriefDescription": ": Context cache hits",
"EventCode": "0x40",
"EventName": "UNC_IIO_IOMMU0.CTXT_CACHE_HITS",
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
index 4a8f8eeb75..ec3aa5ef00 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
@@ -1807,6 +1807,12 @@
"ScaleUnit": "1GHz"
},
{
+ "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)",
+ "MetricExpr": "UNC_UPI_RxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time",
+ "MetricName": "upi_data_receive_bw",
+ "ScaleUnit": "1MB/s"
+ },
+ {
"BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)",
"MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time",
"MetricName": "upi_data_transmit_bw",