diff options
Diffstat (limited to 'tools/perf/arch/arm64')
25 files changed, 1843 insertions, 0 deletions
diff --git a/tools/perf/arch/arm64/Build b/tools/perf/arch/arm64/Build new file mode 100644 index 000000000..a7dd46a5b --- /dev/null +++ b/tools/perf/arch/arm64/Build @@ -0,0 +1,2 @@ +perf-y += util/ +perf-y += tests/ diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile new file mode 100644 index 000000000..fab3095fb --- /dev/null +++ b/tools/perf/arch/arm64/Makefile @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: GPL-2.0 +ifndef NO_DWARF +PERF_HAVE_DWARF_REGS := 1 +endif +PERF_HAVE_JITDUMP := 1 +PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 +HAVE_KVM_STAT_SUPPORT := 1 + +# +# Syscall table generation for perf +# + +out := $(OUTPUT)arch/arm64/include/generated/asm +header := $(out)/syscalls.c +incpath := $(srctree)/tools +sysdef := $(srctree)/tools/arch/arm64/include/uapi/asm/unistd.h +sysprf := $(srctree)/tools/perf/arch/arm64/entry/syscalls/ +systbl := $(sysprf)/mksyscalltbl + +# Create output directory if not already present +_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') + +$(header): $(sysdef) $(systbl) + $(Q)$(SHELL) '$(systbl)' '$(CC)' '$(HOSTCC)' $(incpath) $(sysdef) > $@ + +clean:: + $(call QUIET_CLEAN, arm64) $(RM) $(header) + +archheaders: $(header) diff --git a/tools/perf/arch/arm64/annotate/instructions.c b/tools/perf/arch/arm64/annotate/instructions.c new file mode 100644 index 000000000..4af0c3a0f --- /dev/null +++ b/tools/perf/arch/arm64/annotate/instructions.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/compiler.h> +#include <sys/types.h> +#include <regex.h> +#include <stdlib.h> + +struct arm64_annotate { + regex_t call_insn, + jump_insn; +}; + +static int arm64_mov__parse(struct arch *arch __maybe_unused, + struct ins_operands *ops, + struct map_symbol *ms __maybe_unused) +{ + char *s = strchr(ops->raw, ','), *target, *endptr; + + if (s == NULL) + return -1; + + *s = '\0'; + ops->source.raw = strdup(ops->raw); + *s = ','; + + if (ops->source.raw == NULL) + return -1; + + target = ++s; + ops->target.raw = strdup(target); + if (ops->target.raw == NULL) + goto out_free_source; + + ops->target.addr = strtoull(target, &endptr, 16); + if (endptr == target) + goto out_free_target; + + s = strchr(endptr, '<'); + if (s == NULL) + goto out_free_target; + endptr = strchr(s + 1, '>'); + if (endptr == NULL) + goto out_free_target; + + *endptr = '\0'; + *s = ' '; + ops->target.name = strdup(s); + *s = '<'; + *endptr = '>'; + if (ops->target.name == NULL) + goto out_free_target; + + return 0; + +out_free_target: + zfree(&ops->target.raw); +out_free_source: + zfree(&ops->source.raw); + return -1; +} + +static int mov__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name); + +static struct ins_ops arm64_mov_ops = { + .parse = arm64_mov__parse, + .scnprintf = mov__scnprintf, +}; + +static struct ins_ops *arm64__associate_instruction_ops(struct arch *arch, const char *name) +{ + struct arm64_annotate *arm = arch->priv; + struct ins_ops *ops; + regmatch_t match[2]; + + if (!regexec(&arm->jump_insn, name, 2, match, 0)) + ops = &jump_ops; + else if (!regexec(&arm->call_insn, name, 2, match, 0)) + ops = &call_ops; + else if (!strcmp(name, "ret")) + ops = &ret_ops; + else + ops = &arm64_mov_ops; + + arch__associate_ins_ops(arch, name, ops); + return ops; +} + +static int arm64__annotate_init(struct arch *arch, char *cpuid __maybe_unused) +{ + struct arm64_annotate *arm; + int err; + + if (arch->initialized) + return 0; + + arm = zalloc(sizeof(*arm)); + if (!arm) + return ENOMEM; + + /* bl, blr */ + err = regcomp(&arm->call_insn, "^blr?$", REG_EXTENDED); + if (err) + goto out_free_arm; + /* b, b.cond, br, cbz/cbnz, tbz/tbnz */ + err = regcomp(&arm->jump_insn, "^[ct]?br?\\.?(cc|cs|eq|ge|gt|hi|hs|le|lo|ls|lt|mi|ne|pl|vc|vs)?n?z?$", + REG_EXTENDED); + if (err) + goto out_free_call; + + arch->initialized = true; + arch->priv = arm; + arch->associate_instruction_ops = arm64__associate_instruction_ops; + arch->objdump.comment_char = '/'; + arch->objdump.skip_functions_char = '+'; + return 0; + +out_free_call: + regfree(&arm->call_insn); +out_free_arm: + free(arm); + return SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP; +} diff --git a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl new file mode 100755 index 000000000..459469b72 --- /dev/null +++ b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl @@ -0,0 +1,62 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Generate system call table for perf. Derived from +# powerpc script. +# +# Copyright IBM Corp. 2017 +# Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> +# Changed by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com> +# Changed by: Kim Phillips <kim.phillips@arm.com> + +gcc=$1 +hostcc=$2 +incpath=$3 +input=$4 + +if ! test -r $input; then + echo "Could not read input file" >&2 + exit 1 +fi + +create_table_from_c() +{ + local sc nr last_sc + + create_table_exe=`mktemp ${TMPDIR:-/tmp}/create-table-XXXXXX` + + { + + cat <<-_EoHEADER + #include <stdio.h> + #include "$input" + int main(int argc, char *argv[]) + { + _EoHEADER + + while read sc nr; do + printf "%s\n" " printf(\"\\t[%d] = \\\"$sc\\\",\\n\", __NR_$sc);" + last_sc=$sc + done + + printf "%s\n" " printf(\"#define SYSCALLTBL_ARM64_MAX_ID %d\\n\", __NR_$last_sc);" + printf "}\n" + + } | $hostcc -I $incpath/include/uapi -o $create_table_exe -x c - + + $create_table_exe + + rm -f $create_table_exe +} + +create_table() +{ + echo "static const char *syscalltbl_arm64[] = {" + create_table_from_c + echo "};" +} + +$gcc -E -dM -x c -I $incpath/include/uapi $input \ + |sed -ne 's/^#define __NR_//p' \ + |sort -t' ' -k2 -nu \ + |create_table diff --git a/tools/perf/arch/arm64/include/arch-tests.h b/tools/perf/arch/arm64/include/arch-tests.h new file mode 100644 index 000000000..452b3d904 --- /dev/null +++ b/tools/perf/arch/arm64/include/arch-tests.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ARCH_TESTS_H +#define ARCH_TESTS_H + +extern struct test_suite *arch_tests[]; + +#endif diff --git a/tools/perf/arch/arm64/include/dwarf-regs-table.h b/tools/perf/arch/arm64/include/dwarf-regs-table.h new file mode 100644 index 000000000..177b2855f --- /dev/null +++ b/tools/perf/arch/arm64/include/dwarf-regs-table.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifdef DEFINE_DWARF_REGSTR_TABLE +/* This is included in perf/util/dwarf-regs.c */ + +static const char * const aarch64_regstr_tbl[] = { + "%x0", "%x1", "%x2", "%x3", "%x4", + "%x5", "%x6", "%x7", "%x8", "%x9", + "%x10", "%x11", "%x12", "%x13", "%x14", + "%x15", "%x16", "%x17", "%x18", "%x19", + "%x20", "%x21", "%x22", "%x23", "%x24", + "%x25", "%x26", "%x27", "%x28", "%x29", + "%lr", "%sp", +}; +#endif diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h new file mode 100644 index 000000000..35a3cc775 --- /dev/null +++ b/tools/perf/arch/arm64/include/perf_regs.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ARCH_PERF_REGS_H +#define ARCH_PERF_REGS_H + +#include <stdlib.h> +#include <linux/types.h> +#define perf_event_arm_regs perf_event_arm64_regs +#include <asm/perf_regs.h> +#undef perf_event_arm_regs + +void perf_regs_load(u64 *regs); + +#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM64_MAX) - 1) +#define PERF_REGS_MAX PERF_REG_ARM64_MAX +#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64 + +#define PERF_REG_IP PERF_REG_ARM64_PC +#define PERF_REG_SP PERF_REG_ARM64_SP + +#endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/arm64/tests/Build b/tools/perf/arch/arm64/tests/Build new file mode 100644 index 000000000..a61c06bdb --- /dev/null +++ b/tools/perf/arch/arm64/tests/Build @@ -0,0 +1,4 @@ +perf-y += regs_load.o +perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o + +perf-y += arch-tests.o diff --git a/tools/perf/arch/arm64/tests/arch-tests.c b/tools/perf/arch/arm64/tests/arch-tests.c new file mode 100644 index 000000000..ad16b4f8f --- /dev/null +++ b/tools/perf/arch/arm64/tests/arch-tests.c @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <string.h> +#include "tests/tests.h" +#include "arch-tests.h" + +struct test_suite *arch_tests[] = { +#ifdef HAVE_DWARF_UNWIND_SUPPORT + &suite__dwarf_unwind, +#endif + NULL, +}; diff --git a/tools/perf/arch/arm64/tests/dwarf-unwind.c b/tools/perf/arch/arm64/tests/dwarf-unwind.c new file mode 100644 index 000000000..46147a483 --- /dev/null +++ b/tools/perf/arch/arm64/tests/dwarf-unwind.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <string.h> +#include "perf_regs.h" +#include "thread.h" +#include "map.h" +#include "maps.h" +#include "event.h" +#include "debug.h" +#include "tests/tests.h" + +#define STACK_SIZE 8192 + +static int sample_ustack(struct perf_sample *sample, + struct thread *thread, u64 *regs) +{ + struct stack_dump *stack = &sample->user_stack; + struct map *map; + unsigned long sp; + u64 stack_size, *buf; + + buf = malloc(STACK_SIZE); + if (!buf) { + pr_debug("failed to allocate sample uregs data\n"); + return -1; + } + + sp = (unsigned long) regs[PERF_REG_ARM64_SP]; + + map = maps__find(thread->maps, (u64)sp); + if (!map) { + pr_debug("failed to get stack map\n"); + free(buf); + return -1; + } + + stack_size = map->end - sp; + stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size; + + memcpy(buf, (void *) sp, stack_size); + stack->data = (char *) buf; + stack->size = stack_size; + return 0; +} + +int test__arch_unwind_sample(struct perf_sample *sample, + struct thread *thread) +{ + struct regs_dump *regs = &sample->user_regs; + u64 *buf; + + buf = calloc(1, sizeof(u64) * PERF_REGS_MAX); + if (!buf) { + pr_debug("failed to allocate sample uregs data\n"); + return -1; + } + + perf_regs_load(buf); + regs->abi = PERF_SAMPLE_REGS_ABI; + regs->regs = buf; + regs->mask = PERF_REGS_MASK; + + return sample_ustack(sample, thread, buf); +} diff --git a/tools/perf/arch/arm64/tests/regs_load.S b/tools/perf/arch/arm64/tests/regs_load.S new file mode 100644 index 000000000..d49de40b6 --- /dev/null +++ b/tools/perf/arch/arm64/tests/regs_load.S @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/linkage.h> + +.text +.type perf_regs_load,%function +#define STR_REG(r) str x##r, [x0, 8 * r] +#define LDR_REG(r) ldr x##r, [x0, 8 * r] +#define SP (8 * 31) +#define PC (8 * 32) +SYM_FUNC_START(perf_regs_load) + STR_REG(0) + STR_REG(1) + STR_REG(2) + STR_REG(3) + STR_REG(4) + STR_REG(5) + STR_REG(6) + STR_REG(7) + STR_REG(8) + STR_REG(9) + STR_REG(10) + STR_REG(11) + STR_REG(12) + STR_REG(13) + STR_REG(14) + STR_REG(15) + STR_REG(16) + STR_REG(17) + STR_REG(18) + STR_REG(19) + STR_REG(20) + STR_REG(21) + STR_REG(22) + STR_REG(23) + STR_REG(24) + STR_REG(25) + STR_REG(26) + STR_REG(27) + STR_REG(28) + STR_REG(29) + STR_REG(30) + mov x1, sp + str x1, [x0, #SP] + str x30, [x0, #PC] + LDR_REG(1) + ret +SYM_FUNC_END(perf_regs_load) diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build new file mode 100644 index 000000000..337aa9bdf --- /dev/null +++ b/tools/perf/arch/arm64/util/Build @@ -0,0 +1,14 @@ +perf-y += header.o +perf-y += machine.o +perf-y += perf_regs.o +perf-y += tsc.o +perf-y += pmu.o +perf-y += kvm-stat.o +perf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o +perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o + +perf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \ + ../../arm/util/auxtrace.o \ + ../../arm/util/cs-etm.o \ + arm-spe.o mem-events.o hisi-ptt.o diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c new file mode 100644 index 000000000..d4c234076 --- /dev/null +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -0,0 +1,545 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Arm Statistical Profiling Extensions (SPE) support + * Copyright (c) 2017-2018, Arm Ltd. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/bitops.h> +#include <linux/log2.h> +#include <linux/zalloc.h> +#include <time.h> + +#include "../../../util/cpumap.h" +#include "../../../util/event.h" +#include "../../../util/evsel.h" +#include "../../../util/evsel_config.h" +#include "../../../util/evlist.h" +#include "../../../util/session.h" +#include <internal/lib.h> // page_size +#include "../../../util/pmu.h" +#include "../../../util/debug.h" +#include "../../../util/auxtrace.h" +#include "../../../util/record.h" +#include "../../../util/arm-spe.h" +#include <tools/libc_compat.h> // reallocarray + +#define KiB(x) ((x) * 1024) +#define MiB(x) ((x) * 1024 * 1024) + +struct arm_spe_recording { + struct auxtrace_record itr; + struct perf_pmu *arm_spe_pmu; + struct evlist *evlist; + int wrapped_cnt; + bool *wrapped; +}; + +static void arm_spe_set_timestamp(struct auxtrace_record *itr, + struct evsel *evsel) +{ + struct arm_spe_recording *ptr; + struct perf_pmu *arm_spe_pmu; + struct evsel_config_term *term = evsel__get_config_term(evsel, CFG_CHG); + u64 user_bits = 0, bit; + + ptr = container_of(itr, struct arm_spe_recording, itr); + arm_spe_pmu = ptr->arm_spe_pmu; + + if (term) + user_bits = term->val.cfg_chg; + + bit = perf_pmu__format_bits(&arm_spe_pmu->format, "ts_enable"); + + /* Skip if user has set it */ + if (bit & user_bits) + return; + + evsel->core.attr.config |= bit; +} + +static size_t +arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused, + struct evlist *evlist __maybe_unused) +{ + return ARM_SPE_AUXTRACE_PRIV_SIZE; +} + +static int arm_spe_info_fill(struct auxtrace_record *itr, + struct perf_session *session, + struct perf_record_auxtrace_info *auxtrace_info, + size_t priv_size) +{ + struct arm_spe_recording *sper = + container_of(itr, struct arm_spe_recording, itr); + struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu; + + if (priv_size != ARM_SPE_AUXTRACE_PRIV_SIZE) + return -EINVAL; + + if (!session->evlist->core.nr_mmaps) + return -EINVAL; + + auxtrace_info->type = PERF_AUXTRACE_ARM_SPE; + auxtrace_info->priv[ARM_SPE_PMU_TYPE] = arm_spe_pmu->type; + + return 0; +} + +static void +arm_spe_snapshot_resolve_auxtrace_defaults(struct record_opts *opts, + bool privileged) +{ + /* + * The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size nor + * snapshot size is specified, then the default is 4MiB for privileged users, 128KiB for + * unprivileged users. + * + * The default auxtrace mmap size is 4MiB/page_size for privileged users, 128KiB for + * unprivileged users. If an unprivileged user does not specify mmap pages, the mmap pages + * will be reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the + * user is likely to get an error as they exceed their mlock limmit. + */ + + /* + * No size were given to '-S' or '-m,', so go with the default + */ + if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(4) / page_size; + } else { + opts->auxtrace_mmap_pages = KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + } else if (!opts->auxtrace_mmap_pages && !privileged && opts->mmap_pages == UINT_MAX) { + opts->mmap_pages = KiB(256) / page_size; + } + + /* + * '-m,xyz' was specified but no snapshot size, so make the snapshot size as big as the + * auxtrace mmap area. + */ + if (!opts->auxtrace_snapshot_size) + opts->auxtrace_snapshot_size = opts->auxtrace_mmap_pages * (size_t)page_size; + + /* + * '-Sxyz' was specified but no auxtrace mmap area, so make the auxtrace mmap area big + * enough to fit the requested snapshot size. + */ + if (!opts->auxtrace_mmap_pages) { + size_t sz = opts->auxtrace_snapshot_size; + + sz = round_up(sz, page_size) / page_size; + opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); + } +} + +static int arm_spe_recording_options(struct auxtrace_record *itr, + struct evlist *evlist, + struct record_opts *opts) +{ + struct arm_spe_recording *sper = + container_of(itr, struct arm_spe_recording, itr); + struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu; + struct evsel *evsel, *arm_spe_evsel = NULL; + struct perf_cpu_map *cpus = evlist->core.user_requested_cpus; + bool privileged = perf_event_paranoid_check(-1); + struct evsel *tracking_evsel; + int err; + u64 bit; + + sper->evlist = evlist; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->core.attr.type == arm_spe_pmu->type) { + if (arm_spe_evsel) { + pr_err("There may be only one " ARM_SPE_PMU_NAME "x event\n"); + return -EINVAL; + } + evsel->core.attr.freq = 0; + evsel->core.attr.sample_period = arm_spe_pmu->default_config->sample_period; + evsel->needs_auxtrace_mmap = true; + arm_spe_evsel = evsel; + opts->full_auxtrace = true; + } + } + + if (!opts->full_auxtrace) + return 0; + + /* + * we are in snapshot mode. + */ + if (opts->auxtrace_snapshot_mode) { + /* + * Command arguments '-Sxyz' and/or '-m,xyz' are missing, so fill those in with + * default values. + */ + if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) + arm_spe_snapshot_resolve_auxtrace_defaults(opts, privileged); + + /* + * Snapshot size can't be bigger than the auxtrace area. + */ + if (opts->auxtrace_snapshot_size > opts->auxtrace_mmap_pages * (size_t)page_size) { + pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", + opts->auxtrace_snapshot_size, + opts->auxtrace_mmap_pages * (size_t)page_size); + return -EINVAL; + } + + /* + * Something went wrong somewhere - this shouldn't happen. + */ + if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) { + pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n"); + return -EINVAL; + } + } + + /* We are in full trace mode but '-m,xyz' wasn't specified */ + if (!opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(4) / page_size; + } else { + opts->auxtrace_mmap_pages = KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + } + + /* Validate auxtrace_mmap_pages */ + if (opts->auxtrace_mmap_pages) { + size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; + size_t min_sz = KiB(8); + + if (sz < min_sz || !is_power_of_2(sz)) { + pr_err("Invalid mmap size for ARM SPE: must be at least %zuKiB and a power of 2\n", + min_sz / 1024); + return -EINVAL; + } + } + + if (opts->auxtrace_snapshot_mode) + pr_debug2("%sx snapshot size: %zu\n", ARM_SPE_PMU_NAME, + opts->auxtrace_snapshot_size); + + /* + * To obtain the auxtrace buffer file descriptor, the auxtrace event + * must come first. + */ + evlist__to_front(evlist, arm_spe_evsel); + + /* + * In the case of per-cpu mmaps, sample CPU for AUX event; + * also enable the timestamp tracing for samples correlation. + */ + if (!perf_cpu_map__empty(cpus)) { + evsel__set_sample_bit(arm_spe_evsel, CPU); + arm_spe_set_timestamp(itr, arm_spe_evsel); + } + + /* + * Set this only so that perf report knows that SPE generates memory info. It has no effect + * on the opening of the event or the SPE data produced. + */ + evsel__set_sample_bit(arm_spe_evsel, DATA_SRC); + + /* + * The PHYS_ADDR flag does not affect the driver behaviour, it is used to + * inform that the resulting output's SPE samples contain physical addresses + * where applicable. + */ + bit = perf_pmu__format_bits(&arm_spe_pmu->format, "pa_enable"); + if (arm_spe_evsel->core.attr.config & bit) + evsel__set_sample_bit(arm_spe_evsel, PHYS_ADDR); + + /* Add dummy event to keep tracking */ + err = parse_event(evlist, "dummy:u"); + if (err) + return err; + + tracking_evsel = evlist__last(evlist); + evlist__set_tracking_event(evlist, tracking_evsel); + + tracking_evsel->core.attr.freq = 0; + tracking_evsel->core.attr.sample_period = 1; + + /* In per-cpu case, always need the time of mmap events etc */ + if (!perf_cpu_map__empty(cpus)) { + evsel__set_sample_bit(tracking_evsel, TIME); + evsel__set_sample_bit(tracking_evsel, CPU); + + /* also track task context switch */ + if (!record_opts__no_switch_events(opts)) + tracking_evsel->core.attr.context_switch = 1; + } + + return 0; +} + +static int arm_spe_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused, + struct record_opts *opts, + const char *str) +{ + unsigned long long snapshot_size = 0; + char *endptr; + + if (str) { + snapshot_size = strtoull(str, &endptr, 0); + if (*endptr || snapshot_size > SIZE_MAX) + return -1; + } + + opts->auxtrace_snapshot_mode = true; + opts->auxtrace_snapshot_size = snapshot_size; + + return 0; +} + +static int arm_spe_snapshot_start(struct auxtrace_record *itr) +{ + struct arm_spe_recording *ptr = + container_of(itr, struct arm_spe_recording, itr); + struct evsel *evsel; + + evlist__for_each_entry(ptr->evlist, evsel) { + if (evsel->core.attr.type == ptr->arm_spe_pmu->type) + return evsel__disable(evsel); + } + return -EINVAL; +} + +static int arm_spe_snapshot_finish(struct auxtrace_record *itr) +{ + struct arm_spe_recording *ptr = + container_of(itr, struct arm_spe_recording, itr); + struct evsel *evsel; + + evlist__for_each_entry(ptr->evlist, evsel) { + if (evsel->core.attr.type == ptr->arm_spe_pmu->type) + return evsel__enable(evsel); + } + return -EINVAL; +} + +static int arm_spe_alloc_wrapped_array(struct arm_spe_recording *ptr, int idx) +{ + bool *wrapped; + int cnt = ptr->wrapped_cnt, new_cnt, i; + + /* + * No need to allocate, so return early. + */ + if (idx < cnt) + return 0; + + /* + * Make ptr->wrapped as big as idx. + */ + new_cnt = idx + 1; + + /* + * Free'ed in arm_spe_recording_free(). + */ + wrapped = reallocarray(ptr->wrapped, new_cnt, sizeof(bool)); + if (!wrapped) + return -ENOMEM; + + /* + * init new allocated values. + */ + for (i = cnt; i < new_cnt; i++) + wrapped[i] = false; + + ptr->wrapped_cnt = new_cnt; + ptr->wrapped = wrapped; + + return 0; +} + +static bool arm_spe_buffer_has_wrapped(unsigned char *buffer, + size_t buffer_size, u64 head) +{ + u64 i, watermark; + u64 *buf = (u64 *)buffer; + size_t buf_size = buffer_size; + + /* + * Defensively handle the case where head might be continually increasing - if its value is + * equal or greater than the size of the ring buffer, then we can safely determine it has + * wrapped around. Otherwise, continue to detect if head might have wrapped. + */ + if (head >= buffer_size) + return true; + + /* + * We want to look the very last 512 byte (chosen arbitrarily) in the ring buffer. + */ + watermark = buf_size - 512; + + /* + * The value of head is somewhere within the size of the ring buffer. This can be that there + * hasn't been enough data to fill the ring buffer yet or the trace time was so long that + * head has numerically wrapped around. To find we need to check if we have data at the + * very end of the ring buffer. We can reliably do this because mmap'ed pages are zeroed + * out and there is a fresh mapping with every new session. + */ + + /* + * head is less than 512 byte from the end of the ring buffer. + */ + if (head > watermark) + watermark = head; + + /* + * Speed things up by using 64 bit transactions (see "u64 *buf" above) + */ + watermark /= sizeof(u64); + buf_size /= sizeof(u64); + + /* + * If we find trace data at the end of the ring buffer, head has been there and has + * numerically wrapped around at least once. + */ + for (i = watermark; i < buf_size; i++) + if (buf[i]) + return true; + + return false; +} + +static int arm_spe_find_snapshot(struct auxtrace_record *itr, int idx, + struct auxtrace_mmap *mm, unsigned char *data, + u64 *head, u64 *old) +{ + int err; + bool wrapped; + struct arm_spe_recording *ptr = + container_of(itr, struct arm_spe_recording, itr); + + /* + * Allocate memory to keep track of wrapping if this is the first + * time we deal with this *mm. + */ + if (idx >= ptr->wrapped_cnt) { + err = arm_spe_alloc_wrapped_array(ptr, idx); + if (err) + return err; + } + + /* + * Check to see if *head has wrapped around. If it hasn't only the + * amount of data between *head and *old is snapshot'ed to avoid + * bloating the perf.data file with zeros. But as soon as *head has + * wrapped around the entire size of the AUX ring buffer it taken. + */ + wrapped = ptr->wrapped[idx]; + if (!wrapped && arm_spe_buffer_has_wrapped(data, mm->len, *head)) { + wrapped = true; + ptr->wrapped[idx] = true; + } + + pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n", + __func__, idx, (size_t)*old, (size_t)*head, mm->len); + + /* + * No wrap has occurred, we can just use *head and *old. + */ + if (!wrapped) + return 0; + + /* + * *head has wrapped around - adjust *head and *old to pickup the + * entire content of the AUX buffer. + */ + if (*head >= mm->len) { + *old = *head - mm->len; + } else { + *head += mm->len; + *old = *head - mm->len; + } + + return 0; +} + +static u64 arm_spe_reference(struct auxtrace_record *itr __maybe_unused) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC_RAW, &ts); + + return ts.tv_sec ^ ts.tv_nsec; +} + +static void arm_spe_recording_free(struct auxtrace_record *itr) +{ + struct arm_spe_recording *sper = + container_of(itr, struct arm_spe_recording, itr); + + free(sper->wrapped); + free(sper); +} + +struct auxtrace_record *arm_spe_recording_init(int *err, + struct perf_pmu *arm_spe_pmu) +{ + struct arm_spe_recording *sper; + + if (!arm_spe_pmu) { + *err = -ENODEV; + return NULL; + } + + sper = zalloc(sizeof(struct arm_spe_recording)); + if (!sper) { + *err = -ENOMEM; + return NULL; + } + + sper->arm_spe_pmu = arm_spe_pmu; + sper->itr.pmu = arm_spe_pmu; + sper->itr.snapshot_start = arm_spe_snapshot_start; + sper->itr.snapshot_finish = arm_spe_snapshot_finish; + sper->itr.find_snapshot = arm_spe_find_snapshot; + sper->itr.parse_snapshot_options = arm_spe_parse_snapshot_options; + sper->itr.recording_options = arm_spe_recording_options; + sper->itr.info_priv_size = arm_spe_info_priv_size; + sper->itr.info_fill = arm_spe_info_fill; + sper->itr.free = arm_spe_recording_free; + sper->itr.reference = arm_spe_reference; + sper->itr.read_finish = auxtrace_record__read_finish; + sper->itr.alignment = 0; + + *err = 0; + return &sper->itr; +} + +struct perf_event_attr +*arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu) +{ + struct perf_event_attr *attr; + + attr = zalloc(sizeof(struct perf_event_attr)); + if (!attr) { + pr_err("arm_spe default config cannot allocate a perf_event_attr\n"); + return NULL; + } + + /* + * If kernel driver doesn't advertise a minimum, + * use max allowable by PMSIDR_EL1.INTERVAL + */ + if (perf_pmu__scan_file(arm_spe_pmu, "caps/min_interval", "%llu", + &attr->sample_period) != 1) { + pr_debug("arm_spe driver doesn't advertise a min. interval. Using 4096\n"); + attr->sample_period = 4096; + } + + arm_spe_pmu->selectable = true; + arm_spe_pmu->is_uncore = false; + + return attr; +} diff --git a/tools/perf/arch/arm64/util/arm64_exception_types.h b/tools/perf/arch/arm64/util/arm64_exception_types.h new file mode 100644 index 000000000..27c981ebe --- /dev/null +++ b/tools/perf/arch/arm64/util/arm64_exception_types.h @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef ARCH_PERF_ARM64_EXCEPTION_TYPES_H +#define ARCH_PERF_ARM64_EXCEPTION_TYPES_H + +/* Per asm/virt.h */ +#define HVC_STUB_ERR 0xbadca11 + +/* Per asm/kvm_asm.h */ +#define ARM_EXCEPTION_IRQ 0 +#define ARM_EXCEPTION_EL1_SERROR 1 +#define ARM_EXCEPTION_TRAP 2 +#define ARM_EXCEPTION_IL 3 +/* The hyp-stub will return this for any kvm_call_hyp() call */ +#define ARM_EXCEPTION_HYP_GONE HVC_STUB_ERR + +#define kvm_arm_exception_type \ + {ARM_EXCEPTION_IRQ, "IRQ" }, \ + {ARM_EXCEPTION_EL1_SERROR, "SERROR" }, \ + {ARM_EXCEPTION_TRAP, "TRAP" }, \ + {ARM_EXCEPTION_IL, "ILLEGAL" }, \ + {ARM_EXCEPTION_HYP_GONE, "HYP_GONE" } + +/* Per asm/esr.h */ +#define ESR_ELx_EC_UNKNOWN (0x00) +#define ESR_ELx_EC_WFx (0x01) +/* Unallocated EC: 0x02 */ +#define ESR_ELx_EC_CP15_32 (0x03) +#define ESR_ELx_EC_CP15_64 (0x04) +#define ESR_ELx_EC_CP14_MR (0x05) +#define ESR_ELx_EC_CP14_LS (0x06) +#define ESR_ELx_EC_FP_ASIMD (0x07) +#define ESR_ELx_EC_CP10_ID (0x08) /* EL2 only */ +#define ESR_ELx_EC_PAC (0x09) /* EL2 and above */ +/* Unallocated EC: 0x0A - 0x0B */ +#define ESR_ELx_EC_CP14_64 (0x0C) +/* Unallocated EC: 0x0d */ +#define ESR_ELx_EC_ILL (0x0E) +/* Unallocated EC: 0x0F - 0x10 */ +#define ESR_ELx_EC_SVC32 (0x11) +#define ESR_ELx_EC_HVC32 (0x12) /* EL2 only */ +#define ESR_ELx_EC_SMC32 (0x13) /* EL2 and above */ +/* Unallocated EC: 0x14 */ +#define ESR_ELx_EC_SVC64 (0x15) +#define ESR_ELx_EC_HVC64 (0x16) /* EL2 and above */ +#define ESR_ELx_EC_SMC64 (0x17) /* EL2 and above */ +#define ESR_ELx_EC_SYS64 (0x18) +#define ESR_ELx_EC_SVE (0x19) +#define ESR_ELx_EC_ERET (0x1a) /* EL2 only */ +/* Unallocated EC: 0x1b - 0x1E */ +#define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */ +#define ESR_ELx_EC_IABT_LOW (0x20) +#define ESR_ELx_EC_IABT_CUR (0x21) +#define ESR_ELx_EC_PC_ALIGN (0x22) +/* Unallocated EC: 0x23 */ +#define ESR_ELx_EC_DABT_LOW (0x24) +#define ESR_ELx_EC_DABT_CUR (0x25) +#define ESR_ELx_EC_SP_ALIGN (0x26) +/* Unallocated EC: 0x27 */ +#define ESR_ELx_EC_FP_EXC32 (0x28) +/* Unallocated EC: 0x29 - 0x2B */ +#define ESR_ELx_EC_FP_EXC64 (0x2C) +/* Unallocated EC: 0x2D - 0x2E */ +#define ESR_ELx_EC_SERROR (0x2F) +#define ESR_ELx_EC_BREAKPT_LOW (0x30) +#define ESR_ELx_EC_BREAKPT_CUR (0x31) +#define ESR_ELx_EC_SOFTSTP_LOW (0x32) +#define ESR_ELx_EC_SOFTSTP_CUR (0x33) +#define ESR_ELx_EC_WATCHPT_LOW (0x34) +#define ESR_ELx_EC_WATCHPT_CUR (0x35) +/* Unallocated EC: 0x36 - 0x37 */ +#define ESR_ELx_EC_BKPT32 (0x38) +/* Unallocated EC: 0x39 */ +#define ESR_ELx_EC_VECTOR32 (0x3A) /* EL2 only */ +/* Unallocated EC: 0x3B */ +#define ESR_ELx_EC_BRK64 (0x3C) +/* Unallocated EC: 0x3D - 0x3F */ +#define ESR_ELx_EC_MAX (0x3F) + +#define ECN(x) { ESR_ELx_EC_##x, #x } + +#define kvm_arm_exception_class \ + ECN(UNKNOWN), ECN(WFx), ECN(CP15_32), ECN(CP15_64), ECN(CP14_MR), \ + ECN(CP14_LS), ECN(FP_ASIMD), ECN(CP10_ID), ECN(PAC), ECN(CP14_64), \ + ECN(SVC64), ECN(HVC64), ECN(SMC64), ECN(SYS64), ECN(SVE), \ + ECN(IMP_DEF), ECN(IABT_LOW), ECN(IABT_CUR), \ + ECN(PC_ALIGN), ECN(DABT_LOW), ECN(DABT_CUR), \ + ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \ + ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \ + ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \ + ECN(BKPT32), ECN(VECTOR32), ECN(BRK64) + +#endif /* ARCH_PERF_ARM64_EXCEPTION_TYPES_H */ diff --git a/tools/perf/arch/arm64/util/dwarf-regs.c b/tools/perf/arch/arm64/util/dwarf-regs.c new file mode 100644 index 000000000..917b97d7c --- /dev/null +++ b/tools/perf/arch/arm64/util/dwarf-regs.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Mapping of DWARF debug register numbers into register names. + * + * Copyright (C) 2010 Will Deacon, ARM Ltd. + */ + +#include <errno.h> +#include <stddef.h> +#include <string.h> +#include <dwarf-regs.h> +#include <linux/ptrace.h> /* for struct user_pt_regs */ +#include <linux/stringify.h> + +struct pt_regs_dwarfnum { + const char *name; + unsigned int dwarfnum; +}; + +#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num} +#define GPR_DWARFNUM_NAME(num) \ + {.name = __stringify(%x##num), .dwarfnum = num} +#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0} +#define DWARFNUM2OFFSET(index) \ + (index * sizeof((struct user_pt_regs *)0)->regs[0]) + +/* + * Reference: + * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0057b/IHI0057B_aadwarf64.pdf + */ +static const struct pt_regs_dwarfnum regdwarfnum_table[] = { + GPR_DWARFNUM_NAME(0), + GPR_DWARFNUM_NAME(1), + GPR_DWARFNUM_NAME(2), + GPR_DWARFNUM_NAME(3), + GPR_DWARFNUM_NAME(4), + GPR_DWARFNUM_NAME(5), + GPR_DWARFNUM_NAME(6), + GPR_DWARFNUM_NAME(7), + GPR_DWARFNUM_NAME(8), + GPR_DWARFNUM_NAME(9), + GPR_DWARFNUM_NAME(10), + GPR_DWARFNUM_NAME(11), + GPR_DWARFNUM_NAME(12), + GPR_DWARFNUM_NAME(13), + GPR_DWARFNUM_NAME(14), + GPR_DWARFNUM_NAME(15), + GPR_DWARFNUM_NAME(16), + GPR_DWARFNUM_NAME(17), + GPR_DWARFNUM_NAME(18), + GPR_DWARFNUM_NAME(19), + GPR_DWARFNUM_NAME(20), + GPR_DWARFNUM_NAME(21), + GPR_DWARFNUM_NAME(22), + GPR_DWARFNUM_NAME(23), + GPR_DWARFNUM_NAME(24), + GPR_DWARFNUM_NAME(25), + GPR_DWARFNUM_NAME(26), + GPR_DWARFNUM_NAME(27), + GPR_DWARFNUM_NAME(28), + GPR_DWARFNUM_NAME(29), + REG_DWARFNUM_NAME("%lr", 30), + REG_DWARFNUM_NAME("%sp", 31), + REG_DWARFNUM_END, +}; + +/** + * get_arch_regstr() - lookup register name from it's DWARF register number + * @n: the DWARF register number + * + * get_arch_regstr() returns the name of the register in struct + * regdwarfnum_table from it's DWARF register number. If the register is not + * found in the table, this returns NULL; + */ +const char *get_arch_regstr(unsigned int n) +{ + const struct pt_regs_dwarfnum *roff; + for (roff = regdwarfnum_table; roff->name != NULL; roff++) + if (roff->dwarfnum == n) + return roff->name; + return NULL; +} + +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_dwarfnum *roff; + + for (roff = regdwarfnum_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return DWARFNUM2OFFSET(roff->dwarfnum); + return -EINVAL; +} diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c new file mode 100644 index 000000000..d730666ab --- /dev/null +++ b/tools/perf/arch/arm64/util/header.c @@ -0,0 +1,101 @@ +#include <stdio.h> +#include <stdlib.h> +#include <perf/cpumap.h> +#include <util/cpumap.h> +#include <internal/cpumap.h> +#include <api/fs/fs.h> +#include <errno.h> +#include "debug.h" +#include "header.h" + +#define MIDR "/regs/identification/midr_el1" +#define MIDR_SIZE 19 +#define MIDR_REVISION_MASK 0xf +#define MIDR_VARIANT_SHIFT 20 +#define MIDR_VARIANT_MASK (0xf << MIDR_VARIANT_SHIFT) + +static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus) +{ + const char *sysfs = sysfs__mountpoint(); + u64 midr = 0; + int cpu; + + if (!sysfs || sz < MIDR_SIZE) + return EINVAL; + + cpus = perf_cpu_map__get(cpus); + + for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) { + char path[PATH_MAX]; + FILE *file; + + scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d"MIDR, + sysfs, cpus->map[cpu]); + + file = fopen(path, "r"); + if (!file) { + pr_debug("fopen failed for file %s\n", path); + continue; + } + + if (!fgets(buf, MIDR_SIZE, file)) { + fclose(file); + continue; + } + fclose(file); + + /* Ignore/clear Variant[23:20] and + * Revision[3:0] of MIDR + */ + midr = strtoul(buf, NULL, 16); + midr &= (~(MIDR_VARIANT_MASK | MIDR_REVISION_MASK)); + scnprintf(buf, MIDR_SIZE, "0x%016lx", midr); + /* got midr break loop */ + break; + } + + perf_cpu_map__put(cpus); + + if (!midr) + return EINVAL; + + return 0; +} + +int get_cpuid(char *buf, size_t sz) +{ + struct perf_cpu_map *cpus = perf_cpu_map__new(NULL); + int ret; + + if (!cpus) + return EINVAL; + + ret = _get_cpuid(buf, sz, cpus); + + perf_cpu_map__put(cpus); + + return ret; +} + +char *get_cpuid_str(struct perf_pmu *pmu) +{ + char *buf = NULL; + int res; + + if (!pmu || !pmu->cpus) + return NULL; + + buf = malloc(MIDR_SIZE); + if (!buf) + return NULL; + + /* read midr from list of cpus mapped to this pmu */ + res = _get_cpuid(buf, MIDR_SIZE, pmu->cpus); + if (res) { + pr_err("failed to get cpuid string for PMU %s\n", pmu->name); + free(buf); + buf = NULL; + } + + return buf; +} diff --git a/tools/perf/arch/arm64/util/hisi-ptt.c b/tools/perf/arch/arm64/util/hisi-ptt.c new file mode 100644 index 000000000..ba97c8a56 --- /dev/null +++ b/tools/perf/arch/arm64/util/hisi-ptt.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * HiSilicon PCIe Trace and Tuning (PTT) support + * Copyright (c) 2022 HiSilicon Technologies Co., Ltd. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/bitops.h> +#include <linux/log2.h> +#include <linux/zalloc.h> +#include <time.h> + +#include <internal/lib.h> // page_size +#include "../../../util/auxtrace.h" +#include "../../../util/cpumap.h" +#include "../../../util/debug.h" +#include "../../../util/event.h" +#include "../../../util/evlist.h" +#include "../../../util/evsel.h" +#include "../../../util/hisi-ptt.h" +#include "../../../util/pmu.h" +#include "../../../util/record.h" +#include "../../../util/session.h" +#include "../../../util/tsc.h" + +#define KiB(x) ((x) * 1024) +#define MiB(x) ((x) * 1024 * 1024) + +struct hisi_ptt_recording { + struct auxtrace_record itr; + struct perf_pmu *hisi_ptt_pmu; + struct evlist *evlist; +}; + +static size_t +hisi_ptt_info_priv_size(struct auxtrace_record *itr __maybe_unused, + struct evlist *evlist __maybe_unused) +{ + return HISI_PTT_AUXTRACE_PRIV_SIZE; +} + +static int hisi_ptt_info_fill(struct auxtrace_record *itr, + struct perf_session *session, + struct perf_record_auxtrace_info *auxtrace_info, + size_t priv_size) +{ + struct hisi_ptt_recording *pttr = + container_of(itr, struct hisi_ptt_recording, itr); + struct perf_pmu *hisi_ptt_pmu = pttr->hisi_ptt_pmu; + + if (priv_size != HISI_PTT_AUXTRACE_PRIV_SIZE) + return -EINVAL; + + if (!session->evlist->core.nr_mmaps) + return -EINVAL; + + auxtrace_info->type = PERF_AUXTRACE_HISI_PTT; + auxtrace_info->priv[0] = hisi_ptt_pmu->type; + + return 0; +} + +static int hisi_ptt_set_auxtrace_mmap_page(struct record_opts *opts) +{ + bool privileged = perf_event_paranoid_check(-1); + + if (!opts->full_auxtrace) + return 0; + + if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(16) / page_size; + } else { + opts->auxtrace_mmap_pages = KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + } + + /* Validate auxtrace_mmap_pages */ + if (opts->auxtrace_mmap_pages) { + size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; + size_t min_sz = KiB(8); + + if (sz < min_sz || !is_power_of_2(sz)) { + pr_err("Invalid mmap size for HISI PTT: must be at least %zuKiB and a power of 2\n", + min_sz / 1024); + return -EINVAL; + } + } + + return 0; +} + +static int hisi_ptt_recording_options(struct auxtrace_record *itr, + struct evlist *evlist, + struct record_opts *opts) +{ + struct hisi_ptt_recording *pttr = + container_of(itr, struct hisi_ptt_recording, itr); + struct perf_pmu *hisi_ptt_pmu = pttr->hisi_ptt_pmu; + struct evsel *evsel, *hisi_ptt_evsel = NULL; + struct evsel *tracking_evsel; + int err; + + pttr->evlist = evlist; + evlist__for_each_entry(evlist, evsel) { + if (evsel->core.attr.type == hisi_ptt_pmu->type) { + if (hisi_ptt_evsel) { + pr_err("There may be only one " HISI_PTT_PMU_NAME "x event\n"); + return -EINVAL; + } + evsel->core.attr.freq = 0; + evsel->core.attr.sample_period = 1; + evsel->needs_auxtrace_mmap = true; + hisi_ptt_evsel = evsel; + opts->full_auxtrace = true; + } + } + + err = hisi_ptt_set_auxtrace_mmap_page(opts); + if (err) + return err; + /* + * To obtain the auxtrace buffer file descriptor, the auxtrace event + * must come first. + */ + evlist__to_front(evlist, hisi_ptt_evsel); + evsel__set_sample_bit(hisi_ptt_evsel, TIME); + + /* Add dummy event to keep tracking */ + err = parse_event(evlist, "dummy:u"); + if (err) + return err; + + tracking_evsel = evlist__last(evlist); + evlist__set_tracking_event(evlist, tracking_evsel); + + tracking_evsel->core.attr.freq = 0; + tracking_evsel->core.attr.sample_period = 1; + evsel__set_sample_bit(tracking_evsel, TIME); + + return 0; +} + +static u64 hisi_ptt_reference(struct auxtrace_record *itr __maybe_unused) +{ + return rdtsc(); +} + +static void hisi_ptt_recording_free(struct auxtrace_record *itr) +{ + struct hisi_ptt_recording *pttr = + container_of(itr, struct hisi_ptt_recording, itr); + + free(pttr); +} + +struct auxtrace_record *hisi_ptt_recording_init(int *err, + struct perf_pmu *hisi_ptt_pmu) +{ + struct hisi_ptt_recording *pttr; + + if (!hisi_ptt_pmu) { + *err = -ENODEV; + return NULL; + } + + pttr = zalloc(sizeof(*pttr)); + if (!pttr) { + *err = -ENOMEM; + return NULL; + } + + pttr->hisi_ptt_pmu = hisi_ptt_pmu; + pttr->itr.pmu = hisi_ptt_pmu; + pttr->itr.recording_options = hisi_ptt_recording_options; + pttr->itr.info_priv_size = hisi_ptt_info_priv_size; + pttr->itr.info_fill = hisi_ptt_info_fill; + pttr->itr.free = hisi_ptt_recording_free; + pttr->itr.reference = hisi_ptt_reference; + pttr->itr.read_finish = auxtrace_record__read_finish; + pttr->itr.alignment = 0; + + *err = 0; + return &pttr->itr; +} diff --git a/tools/perf/arch/arm64/util/kvm-stat.c b/tools/perf/arch/arm64/util/kvm-stat.c new file mode 100644 index 000000000..73d18e0ed --- /dev/null +++ b/tools/perf/arch/arm64/util/kvm-stat.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <errno.h> +#include <memory.h> +#include "../../../util/evsel.h" +#include "../../../util/kvm-stat.h" +#include "arm64_exception_types.h" +#include "debug.h" + +define_exit_reasons_table(arm64_exit_reasons, kvm_arm_exception_type); +define_exit_reasons_table(arm64_trap_exit_reasons, kvm_arm_exception_class); + +const char *kvm_trap_exit_reason = "esr_ec"; +const char *vcpu_id_str = "id"; +const int decode_str_len = 20; +const char *kvm_exit_reason = "ret"; +const char *kvm_entry_trace = "kvm:kvm_entry"; +const char *kvm_exit_trace = "kvm:kvm_exit"; + +const char *kvm_events_tp[] = { + "kvm:kvm_entry", + "kvm:kvm_exit", + NULL, +}; + +static void event_get_key(struct evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + key->info = 0; + key->key = evsel__intval(evsel, sample, kvm_exit_reason); + key->exit_reasons = arm64_exit_reasons; + + /* + * TRAP exceptions carry exception class info in esr_ec field + * and, hence, we need to use a different exit_reasons table to + * properly decode event's est_ec. + */ + if (key->key == ARM_EXCEPTION_TRAP) { + key->key = evsel__intval(evsel, sample, kvm_trap_exit_reason); + key->exit_reasons = arm64_trap_exit_reasons; + } +} + +static bool event_begin(struct evsel *evsel, + struct perf_sample *sample __maybe_unused, + struct event_key *key __maybe_unused) +{ + return !strcmp(evsel->name, kvm_entry_trace); +} + +static bool event_end(struct evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + if (!strcmp(evsel->name, kvm_exit_trace)) { + event_get_key(evsel, sample, key); + return true; + } + return false; +} + +static struct kvm_events_ops exit_events = { + .is_begin_event = event_begin, + .is_end_event = event_end, + .decode_key = exit_event_decode_key, + .name = "VM-EXIT" +}; + +struct kvm_reg_events_ops kvm_reg_events_ops[] = { + { + .name = "vmexit", + .ops = &exit_events, + }, + { NULL, NULL }, +}; + +const char * const kvm_skip_events[] = { + NULL, +}; + +int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused) +{ + kvm->exit_reasons_isa = "arm64"; + return 0; +} diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c new file mode 100644 index 000000000..41c1596e5 --- /dev/null +++ b/tools/perf/arch/arm64/util/machine.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <inttypes.h> +#include <stdio.h> +#include <string.h> +#include "debug.h" +#include "symbol.h" +#include "callchain.h" +#include "record.h" + +void arch__add_leaf_frame_record_opts(struct record_opts *opts) +{ + opts->sample_user_regs |= sample_reg_masks[PERF_REG_ARM64_LR].mask; +} diff --git a/tools/perf/arch/arm64/util/mem-events.c b/tools/perf/arch/arm64/util/mem-events.c new file mode 100644 index 000000000..df817d1f9 --- /dev/null +++ b/tools/perf/arch/arm64/util/mem-events.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "map_symbol.h" +#include "mem-events.h" + +#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s } + +static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { + E("spe-load", "arm_spe_0/ts_enable=1,pa_enable=1,load_filter=1,store_filter=0,min_latency=%u/", "arm_spe_0"), + E("spe-store", "arm_spe_0/ts_enable=1,pa_enable=1,load_filter=0,store_filter=1/", "arm_spe_0"), + E("spe-ldst", "arm_spe_0/ts_enable=1,pa_enable=1,load_filter=1,store_filter=1,min_latency=%u/", "arm_spe_0"), +}; + +static char mem_ev_name[100]; + +struct perf_mem_event *perf_mem_events__ptr(int i) +{ + if (i >= PERF_MEM_EVENTS__MAX) + return NULL; + + return &perf_mem_events[i]; +} + +char *perf_mem_events__name(int i, char *pmu_name __maybe_unused) +{ + struct perf_mem_event *e = perf_mem_events__ptr(i); + + if (i >= PERF_MEM_EVENTS__MAX) + return NULL; + + if (i == PERF_MEM_EVENTS__LOAD || i == PERF_MEM_EVENTS__LOAD_STORE) + scnprintf(mem_ev_name, sizeof(mem_ev_name), + e->name, perf_mem_events__loads_ldlat); + else /* PERF_MEM_EVENTS__STORE */ + scnprintf(mem_ev_name, sizeof(mem_ev_name), e->name); + + return mem_ev_name; +} diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c new file mode 100644 index 000000000..006692c9b --- /dev/null +++ b/tools/perf/arch/arm64/util/perf_regs.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <errno.h> +#include <regex.h> +#include <string.h> +#include <sys/auxv.h> +#include <linux/kernel.h> +#include <linux/zalloc.h> + +#include "../../../perf-sys.h" +#include "../../../util/debug.h" +#include "../../../util/event.h" +#include "../../../util/perf_regs.h" + +#ifndef HWCAP_SVE +#define HWCAP_SVE (1 << 22) +#endif + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG(x0, PERF_REG_ARM64_X0), + SMPL_REG(x1, PERF_REG_ARM64_X1), + SMPL_REG(x2, PERF_REG_ARM64_X2), + SMPL_REG(x3, PERF_REG_ARM64_X3), + SMPL_REG(x4, PERF_REG_ARM64_X4), + SMPL_REG(x5, PERF_REG_ARM64_X5), + SMPL_REG(x6, PERF_REG_ARM64_X6), + SMPL_REG(x7, PERF_REG_ARM64_X7), + SMPL_REG(x8, PERF_REG_ARM64_X8), + SMPL_REG(x9, PERF_REG_ARM64_X9), + SMPL_REG(x10, PERF_REG_ARM64_X10), + SMPL_REG(x11, PERF_REG_ARM64_X11), + SMPL_REG(x12, PERF_REG_ARM64_X12), + SMPL_REG(x13, PERF_REG_ARM64_X13), + SMPL_REG(x14, PERF_REG_ARM64_X14), + SMPL_REG(x15, PERF_REG_ARM64_X15), + SMPL_REG(x16, PERF_REG_ARM64_X16), + SMPL_REG(x17, PERF_REG_ARM64_X17), + SMPL_REG(x18, PERF_REG_ARM64_X18), + SMPL_REG(x19, PERF_REG_ARM64_X19), + SMPL_REG(x20, PERF_REG_ARM64_X20), + SMPL_REG(x21, PERF_REG_ARM64_X21), + SMPL_REG(x22, PERF_REG_ARM64_X22), + SMPL_REG(x23, PERF_REG_ARM64_X23), + SMPL_REG(x24, PERF_REG_ARM64_X24), + SMPL_REG(x25, PERF_REG_ARM64_X25), + SMPL_REG(x26, PERF_REG_ARM64_X26), + SMPL_REG(x27, PERF_REG_ARM64_X27), + SMPL_REG(x28, PERF_REG_ARM64_X28), + SMPL_REG(x29, PERF_REG_ARM64_X29), + SMPL_REG(lr, PERF_REG_ARM64_LR), + SMPL_REG(sp, PERF_REG_ARM64_SP), + SMPL_REG(pc, PERF_REG_ARM64_PC), + SMPL_REG(vg, PERF_REG_ARM64_VG), + SMPL_REG_END +}; + +/* %xNUM */ +#define SDT_OP_REGEX1 "^(x[1-2]?[0-9]|3[0-1])$" + +/* [sp], [sp, NUM] */ +#define SDT_OP_REGEX2 "^\\[sp(, )?([0-9]+)?\\]$" + +static regex_t sdt_op_regex1, sdt_op_regex2; + +static int sdt_init_op_regex(void) +{ + static int initialized; + int ret = 0; + + if (initialized) + return 0; + + ret = regcomp(&sdt_op_regex1, SDT_OP_REGEX1, REG_EXTENDED); + if (ret) + goto error; + + ret = regcomp(&sdt_op_regex2, SDT_OP_REGEX2, REG_EXTENDED); + if (ret) + goto free_regex1; + + initialized = 1; + return 0; + +free_regex1: + regfree(&sdt_op_regex1); +error: + pr_debug4("Regex compilation error.\n"); + return ret; +} + +/* + * SDT marker arguments on Arm64 uses %xREG or [sp, NUM], currently + * support these two formats. + */ +int arch_sdt_arg_parse_op(char *old_op, char **new_op) +{ + int ret, new_len; + regmatch_t rm[5]; + + ret = sdt_init_op_regex(); + if (ret < 0) + return ret; + + if (!regexec(&sdt_op_regex1, old_op, 3, rm, 0)) { + /* Extract xNUM */ + new_len = 2; /* % NULL */ + new_len += (int)(rm[1].rm_eo - rm[1].rm_so); + + *new_op = zalloc(new_len); + if (!*new_op) + return -ENOMEM; + + scnprintf(*new_op, new_len, "%%%.*s", + (int)(rm[1].rm_eo - rm[1].rm_so), old_op + rm[1].rm_so); + } else if (!regexec(&sdt_op_regex2, old_op, 5, rm, 0)) { + /* [sp], [sp, NUM] or [sp,NUM] */ + new_len = 7; /* + ( % s p ) NULL */ + + /* If the argument is [sp], need to fill offset '0' */ + if (rm[2].rm_so == -1) + new_len += 1; + else + new_len += (int)(rm[2].rm_eo - rm[2].rm_so); + + *new_op = zalloc(new_len); + if (!*new_op) + return -ENOMEM; + + if (rm[2].rm_so == -1) + scnprintf(*new_op, new_len, "+0(%%sp)"); + else + scnprintf(*new_op, new_len, "+%.*s(%%sp)", + (int)(rm[2].rm_eo - rm[2].rm_so), + old_op + rm[2].rm_so); + } else { + pr_debug4("Skipping unsupported SDT argument: %s\n", old_op); + return SDT_ARG_SKIP; + } + + return SDT_ARG_VALID; +} + +uint64_t arch__user_reg_mask(void) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .sample_type = PERF_SAMPLE_REGS_USER, + .disabled = 1, + .exclude_kernel = 1, + .sample_period = 1, + .sample_regs_user = PERF_REGS_MASK + }; + int fd; + + if (getauxval(AT_HWCAP) & HWCAP_SVE) + attr.sample_regs_user |= SMPL_REG_MASK(PERF_REG_ARM64_VG); + + /* + * Check if the pmu supports perf extended regs, before + * returning the register mask to sample. + */ + if (attr.sample_regs_user != PERF_REGS_MASK) { + event_attr_init(&attr); + fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + if (fd != -1) { + close(fd); + return attr.sample_regs_user; + } + } + return PERF_REGS_MASK; +} diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c new file mode 100644 index 000000000..f849b1e88 --- /dev/null +++ b/tools/perf/arch/arm64/util/pmu.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "../../../util/cpumap.h" +#include "../../../util/pmu.h" + +const struct pmu_events_table *pmu_events_table__find(void) +{ + struct perf_pmu *pmu = NULL; + + while ((pmu = perf_pmu__scan(pmu))) { + if (!is_pmu_core(pmu->name)) + continue; + + /* + * The cpumap should cover all CPUs. Otherwise, some CPUs may + * not support some events or have different event IDs. + */ + if (pmu->cpus->nr != cpu__max_cpu().cpu) + return NULL; + + return perf_pmu__find_table(pmu); + } + + return NULL; +} diff --git a/tools/perf/arch/arm64/util/tsc.c b/tools/perf/arch/arm64/util/tsc.c new file mode 100644 index 000000000..cc85bd9e7 --- /dev/null +++ b/tools/perf/arch/arm64/util/tsc.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/types.h> + +#include "../../../util/tsc.h" + +u64 rdtsc(void) +{ + u64 val; + + /* + * According to ARM DDI 0487F.c, from Armv8.0 to Armv8.5 inclusive, the + * system counter is at least 56 bits wide; from Armv8.6, the counter + * must be 64 bits wide. So the system counter could be less than 64 + * bits wide and it is attributed with the flag 'cap_user_time_short' + * is true. + */ + asm volatile("mrs %0, cntvct_el0" : "=r" (val)); + + return val; +} diff --git a/tools/perf/arch/arm64/util/unwind-libdw.c b/tools/perf/arch/arm64/util/unwind-libdw.c new file mode 100644 index 000000000..a50941629 --- /dev/null +++ b/tools/perf/arch/arm64/util/unwind-libdw.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <elfutils/libdwfl.h> +#include "../../../util/unwind-libdw.h" +#include "../../../util/perf_regs.h" +#include "../../../util/event.h" + +bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) +{ + struct unwind_info *ui = arg; + struct regs_dump *user_regs = &ui->sample->user_regs; + Dwarf_Word dwarf_regs[PERF_REG_ARM64_MAX], dwarf_pc; + +#define REG(r) ({ \ + Dwarf_Word val = 0; \ + perf_reg_value(&val, user_regs, PERF_REG_ARM64_##r); \ + val; \ +}) + + dwarf_regs[0] = REG(X0); + dwarf_regs[1] = REG(X1); + dwarf_regs[2] = REG(X2); + dwarf_regs[3] = REG(X3); + dwarf_regs[4] = REG(X4); + dwarf_regs[5] = REG(X5); + dwarf_regs[6] = REG(X6); + dwarf_regs[7] = REG(X7); + dwarf_regs[8] = REG(X8); + dwarf_regs[9] = REG(X9); + dwarf_regs[10] = REG(X10); + dwarf_regs[11] = REG(X11); + dwarf_regs[12] = REG(X12); + dwarf_regs[13] = REG(X13); + dwarf_regs[14] = REG(X14); + dwarf_regs[15] = REG(X15); + dwarf_regs[16] = REG(X16); + dwarf_regs[17] = REG(X17); + dwarf_regs[18] = REG(X18); + dwarf_regs[19] = REG(X19); + dwarf_regs[20] = REG(X20); + dwarf_regs[21] = REG(X21); + dwarf_regs[22] = REG(X22); + dwarf_regs[23] = REG(X23); + dwarf_regs[24] = REG(X24); + dwarf_regs[25] = REG(X25); + dwarf_regs[26] = REG(X26); + dwarf_regs[27] = REG(X27); + dwarf_regs[28] = REG(X28); + dwarf_regs[29] = REG(X29); + dwarf_regs[30] = REG(LR); + dwarf_regs[31] = REG(SP); + + if (!dwfl_thread_state_registers(thread, 0, PERF_REG_ARM64_MAX, + dwarf_regs)) + return false; + + dwarf_pc = REG(PC); + dwfl_thread_state_register_pc(thread, dwarf_pc); + + return true; +} diff --git a/tools/perf/arch/arm64/util/unwind-libunwind.c b/tools/perf/arch/arm64/util/unwind-libunwind.c new file mode 100644 index 000000000..871af5992 --- /dev/null +++ b/tools/perf/arch/arm64/util/unwind-libunwind.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <errno.h> + +#ifndef REMOTE_UNWIND_LIBUNWIND +#include <libunwind.h> +#include "perf_regs.h" +#include "../../../util/unwind.h" +#endif +#include "../../../util/debug.h" + +int LIBUNWIND__ARCH_REG_ID(int regnum) +{ + if (regnum < 0 || regnum >= PERF_REG_ARM64_EXTENDED_MAX) + return -EINVAL; + + return regnum; +} |